diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 00a963c4..1e1724af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,8 @@ jobs: erlang: "23.0" steps: - uses: actions/checkout@v1.0.0 + with: + submodules: 'recursive' - name: Install dependencies run: |- apt-get update @@ -47,9 +49,4 @@ jobs: - name: Run tests run: |- - mix test.$PARSER - - - name: Run inch.report - run: |- - mix deps.get --only docs - MIX_ENV=docs mix inch.report + MIX_ENV=test mix test.$PARSER diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..6ce64b29 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "test/html5lib-tests"] + path = test/html5lib-tests + url = https://github.com/html5lib/html5lib-tests.git diff --git a/CHANGELOG.md b/CHANGELOG.md index ba785f77..e0f09493 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased][unreleased] +## [0.32.0] - 2021-10-18 + +### Added + +- Add an HTML tokenizer written in Elixir - this still experimental and it's not stable API yet. +- Add support for HTML IDs containing periods in the selectors - thanks [@Hugo-Hache](https://github.com/Hugo-Hache) +- Add support for case-insensitive CSS attribute selectors - thanks [@fcapovilla](https://github.com/fcapovilla) +- Add the `:root` pseudo-class selector - thanks [@fcapovilla](https://github.com/fcapovilla) + ## [0.31.0] - 2021-06-11 ### Changed @@ -581,7 +590,8 @@ of the parent element inside HTML. - Elixir version requirement from "~> 1.0.0" to ">= 1.0.0". -[unreleased]: https://github.com/philss/floki/compare/v0.31.0...HEAD +[unreleased]: https://github.com/philss/floki/compare/v0.32.0...HEAD +[0.32.0]: https://github.com/philss/floki/compare/v0.31.0...v0.32.0 [0.31.0]: https://github.com/philss/floki/compare/v0.30.1...v0.31.0 [0.30.1]: https://github.com/philss/floki/compare/v0.30.0...v0.30.1 [0.30.0]: https://github.com/philss/floki/compare/v0.29.0...v0.30.0 diff --git a/README.md b/README.md index b562a54c..d6d62d55 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ [![Actions Status](https://github.com/philss/floki/workflows/CI/badge.svg?branch=master)](https://github.com/philss/floki/actions) [![Floki version](https://img.shields.io/hexpm/v/floki.svg)](https://hex.pm/packages/floki) [![Hex Docs](https://img.shields.io/badge/hex-docs-lightgreen.svg)](https://hexdocs.pm/floki/) -[![Inline docs](https://inch-ci.org/github/philss/floki.svg?branch=master)](https://inch-ci.org/github/philss/floki) [![Hex.pm](https://img.shields.io/hexpm/dt/floki.svg)](https://hex.pm/packages/floki) [![License](https://img.shields.io/hexpm/l/floki.svg)](https://github.com/philss/floki/blob/master/LICENSE) [![Last Updated](https://img.shields.io/github/last-commit/philss/floki.svg)](https://github.com/philss/floki/commits/master) @@ -62,7 +61,7 @@ Add Floki to your `mix.exs`: ```elixir defp deps do [ - {:floki, "~> 0.31.0"} + {:floki, "~> 0.32.0"} ] end ``` @@ -121,8 +120,8 @@ After Rust is set up, you need to add `html5ever` NIF to your dependency list: ```elixir defp deps do [ - {:floki, "~> 0.31.0"}, - {:html5ever, "~> 0.8.0"} + {:floki, "~> 0.32.0"}, + {:html5ever, "~> 0.9.0"} ] end ``` @@ -149,7 +148,7 @@ First, add `fast_html` to your dependencies: ```elixir defp deps do [ - {:floki, "~> 0.31.0"}, + {:floki, "~> 0.32.0"}, {:fast_html, "~> 2.0"} ] end @@ -250,8 +249,9 @@ Here you find all the [CSS selectors](https://www.w3.org/TR/selectors/#selectors | E:checked | An E element (checkbox, radio, or option) that is checked | | E:disabled | An E element (button, input, select, textarea, or option) that is disabled | | E.warning | an E element whose class is "warning" | -| E#myid | an E element with ID equal to "myid" | +| E#myid | an E element with ID equal to "myid" (for ids containing periods, use `#my\\.id` or `[id="my.id"]`) | | E:not(s) | an E element that does not match simple selector s | +| :root | the root node or nodes (in case of fragments) of the document. Most of the times this is the `html` tag | | E F | an F element descendant of an E element | | E > F | an F element child of an E element | | E + F | an F element immediately preceded by an E element | diff --git a/benchs/tokenizers.exs b/benchs/tokenizers.exs new file mode 100644 index 00000000..7b492657 --- /dev/null +++ b/benchs/tokenizers.exs @@ -0,0 +1,34 @@ +# This benchmark compares the implementation of tokenizers +# from mochiweb and the brand new one from Floki. +# In order to run this, you first need to extract the +# HTML files using the "extract.sh" script: +# +# ./extract.sh +# +# After that, you need to run like this: +# +# mix run benchs/tokenizers.exs +# + +read_file = fn name -> + __ENV__.file + |> Path.dirname() + |> Path.join(name) + |> File.read!() +end + +inputs = %{ + "big" => read_file.("big.html"), + "medium" => read_file.("medium.html"), + "small" => read_file.("small.html") +} + +Benchee.run( + %{ + "mochiweb" => fn input -> :floki_mochi_html.tokens(input) end, + "floki" => fn input -> Floki.HTML.Tokenizer.tokenize(input) end + }, + time: 20, + inputs: inputs, + memory_time: 4 +) diff --git a/lib/floki/entities.ex b/lib/floki/entities.ex new file mode 100644 index 00000000..2e1e1a0b --- /dev/null +++ b/lib/floki/entities.ex @@ -0,0 +1,2242 @@ +defmodule Floki.Entities do + # This file was generated by "Mix.Tasks.GenerateEntities" + + @moduledoc false + + @doc """ + Returns unicode codepoints for a given HTML entity. + """ + @spec get(binary()) :: list(integer) + def get("Æ"), do: [198] + def get("Æ"), do: [198] + def get("&"), do: [38] + def get("&"), do: [38] + def get("Á"), do: [193] + def get("Á"), do: [193] + def get("Ă"), do: [258] + def get("Â"), do: [194] + def get("Â"), do: [194] + def get("А"), do: [1040] + def get("𝔄"), do: [120_068] + def get("À"), do: [192] + def get("À"), do: [192] + def get("Α"), do: [913] + def get("Ā"), do: [256] + def get("⩓"), do: [10835] + def get("Ą"), do: [260] + def get("𝔸"), do: [120_120] + def get("⁡"), do: [8289] + def get("Å"), do: [197] + def get("Å"), do: [197] + def get("𝒜"), do: [119_964] + def get("≔"), do: [8788] + def get("Ã"), do: [195] + def get("Ã"), do: [195] + def get("Ä"), do: [196] + def get("Ä"), do: [196] + def get("∖"), do: [8726] + def get("⫧"), do: [10983] + def get("⌆"), do: [8966] + def get("Б"), do: [1041] + def get("∵"), do: [8757] + def get("ℬ"), do: [8492] + def get("Β"), do: [914] + def get("𝔅"), do: [120_069] + def get("𝔹"), do: [120_121] + def get("˘"), do: [728] + def get("ℬ"), do: [8492] + def get("≎"), do: [8782] + def get("Ч"), do: [1063] + def get("©"), do: [169] + def get("©"), do: [169] + def get("Ć"), do: [262] + def get("⋒"), do: [8914] + def get("ⅅ"), do: [8517] + def get("ℭ"), do: [8493] + def get("Č"), do: [268] + def get("Ç"), do: [199] + def get("Ç"), do: [199] + def get("Ĉ"), do: [264] + def get("∰"), do: [8752] + def get("Ċ"), do: [266] + def get("¸"), do: [184] + def get("·"), do: [183] + def get("ℭ"), do: [8493] + def get("Χ"), do: [935] + def get("⊙"), do: [8857] + def get("⊖"), do: [8854] + def get("⊕"), do: [8853] + def get("⊗"), do: [8855] + def get("∲"), do: [8754] + def get("”"), do: [8221] + def get("’"), do: [8217] + def get("∷"), do: [8759] + def get("⩴"), do: [10868] + def get("≡"), do: [8801] + def get("∯"), do: [8751] + def get("∮"), do: [8750] + def get("ℂ"), do: [8450] + def get("∐"), do: [8720] + def get("∳"), do: [8755] + def get("⨯"), do: [10799] + def get("𝒞"), do: [119_966] + def get("⋓"), do: [8915] + def get("≍"), do: [8781] + def get("ⅅ"), do: [8517] + def get("⤑"), do: [10513] + def get("Ђ"), do: [1026] + def get("Ѕ"), do: [1029] + def get("Џ"), do: [1039] + def get("‡"), do: [8225] + def get("↡"), do: [8609] + def get("⫤"), do: [10980] + def get("Ď"), do: [270] + def get("Д"), do: [1044] + def get("∇"), do: [8711] + def get("Δ"), do: [916] + def get("𝔇"), do: [120_071] + def get("´"), do: [180] + def get("˙"), do: [729] + def get("˝"), do: [733] + def get("`"), do: [96] + def get("˜"), do: [732] + def get("⋄"), do: [8900] + def get("ⅆ"), do: [8518] + def get("𝔻"), do: [120_123] + def get("¨"), do: [168] + def get("⃜"), do: [8412] + def get("≐"), do: [8784] + def get("∯"), do: [8751] + def get("¨"), do: [168] + def get("⇓"), do: [8659] + def get("⇐"), do: [8656] + def get("⇔"), do: [8660] + def get("⫤"), do: [10980] + def get("⟸"), do: [10232] + def get("⟺"), do: [10234] + def get("⟹"), do: [10233] + def get("⇒"), do: [8658] + def get("⊨"), do: [8872] + def get("⇑"), do: [8657] + def get("⇕"), do: [8661] + def get("∥"), do: [8741] + def get("↓"), do: [8595] + def get("⤓"), do: [10515] + def get("⇵"), do: [8693] + def get("̑"), do: [785] + def get("⥐"), do: [10576] + def get("⥞"), do: [10590] + def get("↽"), do: [8637] + def get("⥖"), do: [10582] + def get("⥟"), do: [10591] + def get("⇁"), do: [8641] + def get("⥗"), do: [10583] + def get("⊤"), do: [8868] + def get("↧"), do: [8615] + def get("⇓"), do: [8659] + def get("𝒟"), do: [119_967] + def get("Đ"), do: [272] + def get("Ŋ"), do: [330] + def get("Ð"), do: [208] + def get("Ð"), do: [208] + def get("É"), do: [201] + def get("É"), do: [201] + def get("Ě"), do: [282] + def get("Ê"), do: [202] + def get("Ê"), do: [202] + def get("Э"), do: [1069] + def get("Ė"), do: [278] + def get("𝔈"), do: [120_072] + def get("È"), do: [200] + def get("È"), do: [200] + def get("∈"), do: [8712] + def get("Ē"), do: [274] + def get("◻"), do: [9723] + def get("▫"), do: [9643] + def get("Ę"), do: [280] + def get("𝔼"), do: [120_124] + def get("Ε"), do: [917] + def get("⩵"), do: [10869] + def get("≂"), do: [8770] + def get("⇌"), do: [8652] + def get("ℰ"), do: [8496] + def get("⩳"), do: [10867] + def get("Η"), do: [919] + def get("Ë"), do: [203] + def get("Ë"), do: [203] + def get("∃"), do: [8707] + def get("ⅇ"), do: [8519] + def get("Ф"), do: [1060] + def get("𝔉"), do: [120_073] + def get("◼"), do: [9724] + def get("▪"), do: [9642] + def get("𝔽"), do: [120_125] + def get("∀"), do: [8704] + def get("ℱ"), do: [8497] + def get("ℱ"), do: [8497] + def get("Ѓ"), do: [1027] + def get(">"), do: [62] + def get(">"), do: [62] + def get("Γ"), do: [915] + def get("Ϝ"), do: [988] + def get("Ğ"), do: [286] + def get("Ģ"), do: [290] + def get("Ĝ"), do: [284] + def get("Г"), do: [1043] + def get("Ġ"), do: [288] + def get("𝔊"), do: [120_074] + def get("⋙"), do: [8921] + def get("𝔾"), do: [120_126] + def get("≥"), do: [8805] + def get("⋛"), do: [8923] + def get("≧"), do: [8807] + def get("⪢"), do: [10914] + def get("≷"), do: [8823] + def get("⩾"), do: [10878] + def get("≳"), do: [8819] + def get("𝒢"), do: [119_970] + def get("≫"), do: [8811] + def get("Ъ"), do: [1066] + def get("ˇ"), do: [711] + def get("^"), do: [94] + def get("Ĥ"), do: [292] + def get("ℌ"), do: [8460] + def get("ℋ"), do: [8459] + def get("ℍ"), do: [8461] + def get("─"), do: [9472] + def get("ℋ"), do: [8459] + def get("Ħ"), do: [294] + def get("≎"), do: [8782] + def get("≏"), do: [8783] + def get("Е"), do: [1045] + def get("IJ"), do: [306] + def get("Ё"), do: [1025] + def get("Í"), do: [205] + def get("Í"), do: [205] + def get("Î"), do: [206] + def get("Î"), do: [206] + def get("И"), do: [1048] + def get("İ"), do: [304] + def get("ℑ"), do: [8465] + def get("Ì"), do: [204] + def get("Ì"), do: [204] + def get("ℑ"), do: [8465] + def get("Ī"), do: [298] + def get("ⅈ"), do: [8520] + def get("⇒"), do: [8658] + def get("∬"), do: [8748] + def get("∫"), do: [8747] + def get("⋂"), do: [8898] + def get("⁣"), do: [8291] + def get("⁢"), do: [8290] + def get("Į"), do: [302] + def get("𝕀"), do: [120_128] + def get("Ι"), do: [921] + def get("ℐ"), do: [8464] + def get("Ĩ"), do: [296] + def get("І"), do: [1030] + def get("Ï"), do: [207] + def get("Ï"), do: [207] + def get("Ĵ"), do: [308] + def get("Й"), do: [1049] + def get("𝔍"), do: [120_077] + def get("𝕁"), do: [120_129] + def get("𝒥"), do: [119_973] + def get("Ј"), do: [1032] + def get("Є"), do: [1028] + def get("Х"), do: [1061] + def get("Ќ"), do: [1036] + def get("Κ"), do: [922] + def get("Ķ"), do: [310] + def get("К"), do: [1050] + def get("𝔎"), do: [120_078] + def get("𝕂"), do: [120_130] + def get("𝒦"), do: [119_974] + def get("Љ"), do: [1033] + def get("<"), do: [60] + def get("<"), do: [60] + def get("Ĺ"), do: [313] + def get("Λ"), do: [923] + def get("⟪"), do: [10218] + def get("ℒ"), do: [8466] + def get("↞"), do: [8606] + def get("Ľ"), do: [317] + def get("Ļ"), do: [315] + def get("Л"), do: [1051] + def get("⟨"), do: [10216] + def get("←"), do: [8592] + def get("⇤"), do: [8676] + def get("⇆"), do: [8646] + def get("⌈"), do: [8968] + def get("⟦"), do: [10214] + def get("⥡"), do: [10593] + def get("⇃"), do: [8643] + def get("⥙"), do: [10585] + def get("⌊"), do: [8970] + def get("↔"), do: [8596] + def get("⥎"), do: [10574] + def get("⊣"), do: [8867] + def get("↤"), do: [8612] + def get("⥚"), do: [10586] + def get("⊲"), do: [8882] + def get("⧏"), do: [10703] + def get("⊴"), do: [8884] + def get("⥑"), do: [10577] + def get("⥠"), do: [10592] + def get("↿"), do: [8639] + def get("⥘"), do: [10584] + def get("↼"), do: [8636] + def get("⥒"), do: [10578] + def get("⇐"), do: [8656] + def get("⇔"), do: [8660] + def get("⋚"), do: [8922] + def get("≦"), do: [8806] + def get("≶"), do: [8822] + def get("⪡"), do: [10913] + def get("⩽"), do: [10877] + def get("≲"), do: [8818] + def get("𝔏"), do: [120_079] + def get("⋘"), do: [8920] + def get("⇚"), do: [8666] + def get("Ŀ"), do: [319] + def get("⟵"), do: [10229] + def get("⟷"), do: [10231] + def get("⟶"), do: [10230] + def get("⟸"), do: [10232] + def get("⟺"), do: [10234] + def get("⟹"), do: [10233] + def get("𝕃"), do: [120_131] + def get("↙"), do: [8601] + def get("↘"), do: [8600] + def get("ℒ"), do: [8466] + def get("↰"), do: [8624] + def get("Ł"), do: [321] + def get("≪"), do: [8810] + def get("⤅"), do: [10501] + def get("М"), do: [1052] + def get(" "), do: [8287] + def get("ℳ"), do: [8499] + def get("𝔐"), do: [120_080] + def get("∓"), do: [8723] + def get("𝕄"), do: [120_132] + def get("ℳ"), do: [8499] + def get("Μ"), do: [924] + def get("Њ"), do: [1034] + def get("Ń"), do: [323] + def get("Ň"), do: [327] + def get("Ņ"), do: [325] + def get("Н"), do: [1053] + def get("​"), do: [8203] + def get("​"), do: [8203] + def get("​"), do: [8203] + def get("​"), do: [8203] + def get("≫"), do: [8811] + def get("≪"), do: [8810] + def get(" "), do: [10] + def get("𝔑"), do: [120_081] + def get("⁠"), do: [8288] + def get(" "), do: [160] + def get("ℕ"), do: [8469] + def get("⫬"), do: [10988] + def get("≢"), do: [8802] + def get("≭"), do: [8813] + def get("∦"), do: [8742] + def get("∉"), do: [8713] + def get("≠"), do: [8800] + def get("≂̸"), do: [8770, 824] + def get("∄"), do: [8708] + def get("≯"), do: [8815] + def get("≱"), do: [8817] + def get("≧̸"), do: [8807, 824] + def get("≫̸"), do: [8811, 824] + def get("≹"), do: [8825] + def get("⩾̸"), do: [10878, 824] + def get("≵"), do: [8821] + def get("≎̸"), do: [8782, 824] + def get("≏̸"), do: [8783, 824] + def get("⋪"), do: [8938] + def get("⧏̸"), do: [10703, 824] + def get("⋬"), do: [8940] + def get("≮"), do: [8814] + def get("≰"), do: [8816] + def get("≸"), do: [8824] + def get("≪̸"), do: [8810, 824] + def get("⩽̸"), do: [10877, 824] + def get("≴"), do: [8820] + def get("⪢̸"), do: [10914, 824] + def get("⪡̸"), do: [10913, 824] + def get("⊀"), do: [8832] + def get("⪯̸"), do: [10927, 824] + def get("⋠"), do: [8928] + def get("∌"), do: [8716] + def get("⋫"), do: [8939] + def get("⧐̸"), do: [10704, 824] + def get("⋭"), do: [8941] + def get("⊏̸"), do: [8847, 824] + def get("⋢"), do: [8930] + def get("⊐̸"), do: [8848, 824] + def get("⋣"), do: [8931] + def get("⊂⃒"), do: [8834, 8402] + def get("⊈"), do: [8840] + def get("⊁"), do: [8833] + def get("⪰̸"), do: [10928, 824] + def get("⋡"), do: [8929] + def get("≿̸"), do: [8831, 824] + def get("⊃⃒"), do: [8835, 8402] + def get("⊉"), do: [8841] + def get("≁"), do: [8769] + def get("≄"), do: [8772] + def get("≇"), do: [8775] + def get("≉"), do: [8777] + def get("∤"), do: [8740] + def get("𝒩"), do: [119_977] + def get("Ñ"), do: [209] + def get("Ñ"), do: [209] + def get("Ν"), do: [925] + def get("Œ"), do: [338] + def get("Ó"), do: [211] + def get("Ó"), do: [211] + def get("Ô"), do: [212] + def get("Ô"), do: [212] + def get("О"), do: [1054] + def get("Ő"), do: [336] + def get("𝔒"), do: [120_082] + def get("Ò"), do: [210] + def get("Ò"), do: [210] + def get("Ō"), do: [332] + def get("Ω"), do: [937] + def get("Ο"), do: [927] + def get("𝕆"), do: [120_134] + def get("“"), do: [8220] + def get("‘"), do: [8216] + def get("⩔"), do: [10836] + def get("𝒪"), do: [119_978] + def get("Ø"), do: [216] + def get("Ø"), do: [216] + def get("Õ"), do: [213] + def get("Õ"), do: [213] + def get("⨷"), do: [10807] + def get("Ö"), do: [214] + def get("Ö"), do: [214] + def get("‾"), do: [8254] + def get("⏞"), do: [9182] + def get("⎴"), do: [9140] + def get("⏜"), do: [9180] + def get("∂"), do: [8706] + def get("П"), do: [1055] + def get("𝔓"), do: [120_083] + def get("Φ"), do: [934] + def get("Π"), do: [928] + def get("±"), do: [177] + def get("ℌ"), do: [8460] + def get("ℙ"), do: [8473] + def get("⪻"), do: [10939] + def get("≺"), do: [8826] + def get("⪯"), do: [10927] + def get("≼"), do: [8828] + def get("≾"), do: [8830] + def get("″"), do: [8243] + def get("∏"), do: [8719] + def get("∷"), do: [8759] + def get("∝"), do: [8733] + def get("𝒫"), do: [119_979] + def get("Ψ"), do: [936] + def get("""), do: [34] + def get("""), do: [34] + def get("𝔔"), do: [120_084] + def get("ℚ"), do: [8474] + def get("𝒬"), do: [119_980] + def get("⤐"), do: [10512] + def get("®"), do: [174] + def get("®"), do: [174] + def get("Ŕ"), do: [340] + def get("⟫"), do: [10219] + def get("↠"), do: [8608] + def get("⤖"), do: [10518] + def get("Ř"), do: [344] + def get("Ŗ"), do: [342] + def get("Р"), do: [1056] + def get("ℜ"), do: [8476] + def get("∋"), do: [8715] + def get("⇋"), do: [8651] + def get("⥯"), do: [10607] + def get("ℜ"), do: [8476] + def get("Ρ"), do: [929] + def get("⟩"), do: [10217] + def get("→"), do: [8594] + def get("⇥"), do: [8677] + def get("⇄"), do: [8644] + def get("⌉"), do: [8969] + def get("⟧"), do: [10215] + def get("⥝"), do: [10589] + def get("⇂"), do: [8642] + def get("⥕"), do: [10581] + def get("⌋"), do: [8971] + def get("⊢"), do: [8866] + def get("↦"), do: [8614] + def get("⥛"), do: [10587] + def get("⊳"), do: [8883] + def get("⧐"), do: [10704] + def get("⊵"), do: [8885] + def get("⥏"), do: [10575] + def get("⥜"), do: [10588] + def get("↾"), do: [8638] + def get("⥔"), do: [10580] + def get("⇀"), do: [8640] + def get("⥓"), do: [10579] + def get("⇒"), do: [8658] + def get("ℝ"), do: [8477] + def get("⥰"), do: [10608] + def get("⇛"), do: [8667] + def get("ℛ"), do: [8475] + def get("↱"), do: [8625] + def get("⧴"), do: [10740] + def get("Щ"), do: [1065] + def get("Ш"), do: [1064] + def get("Ь"), do: [1068] + def get("Ś"), do: [346] + def get("⪼"), do: [10940] + def get("Š"), do: [352] + def get("Ş"), do: [350] + def get("Ŝ"), do: [348] + def get("С"), do: [1057] + def get("𝔖"), do: [120_086] + def get("↓"), do: [8595] + def get("←"), do: [8592] + def get("→"), do: [8594] + def get("↑"), do: [8593] + def get("Σ"), do: [931] + def get("∘"), do: [8728] + def get("𝕊"), do: [120_138] + def get("√"), do: [8730] + def get("□"), do: [9633] + def get("⊓"), do: [8851] + def get("⊏"), do: [8847] + def get("⊑"), do: [8849] + def get("⊐"), do: [8848] + def get("⊒"), do: [8850] + def get("⊔"), do: [8852] + def get("𝒮"), do: [119_982] + def get("⋆"), do: [8902] + def get("⋐"), do: [8912] + def get("⋐"), do: [8912] + def get("⊆"), do: [8838] + def get("≻"), do: [8827] + def get("⪰"), do: [10928] + def get("≽"), do: [8829] + def get("≿"), do: [8831] + def get("∋"), do: [8715] + def get("∑"), do: [8721] + def get("⋑"), do: [8913] + def get("⊃"), do: [8835] + def get("⊇"), do: [8839] + def get("⋑"), do: [8913] + def get("Þ"), do: [222] + def get("Þ"), do: [222] + def get("™"), do: [8482] + def get("Ћ"), do: [1035] + def get("Ц"), do: [1062] + def get(" "), do: [9] + def get("Τ"), do: [932] + def get("Ť"), do: [356] + def get("Ţ"), do: [354] + def get("Т"), do: [1058] + def get("𝔗"), do: [120_087] + def get("∴"), do: [8756] + def get("Θ"), do: [920] + def get("  "), do: [8287, 8202] + def get(" "), do: [8201] + def get("∼"), do: [8764] + def get("≃"), do: [8771] + def get("≅"), do: [8773] + def get("≈"), do: [8776] + def get("𝕋"), do: [120_139] + def get("⃛"), do: [8411] + def get("𝒯"), do: [119_983] + def get("Ŧ"), do: [358] + def get("Ú"), do: [218] + def get("Ú"), do: [218] + def get("↟"), do: [8607] + def get("⥉"), do: [10569] + def get("Ў"), do: [1038] + def get("Ŭ"), do: [364] + def get("Û"), do: [219] + def get("Û"), do: [219] + def get("У"), do: [1059] + def get("Ű"), do: [368] + def get("𝔘"), do: [120_088] + def get("Ù"), do: [217] + def get("Ù"), do: [217] + def get("Ū"), do: [362] + def get("_"), do: [95] + def get("⏟"), do: [9183] + def get("⎵"), do: [9141] + def get("⏝"), do: [9181] + def get("⋃"), do: [8899] + def get("⊎"), do: [8846] + def get("Ų"), do: [370] + def get("𝕌"), do: [120_140] + def get("↑"), do: [8593] + def get("⤒"), do: [10514] + def get("⇅"), do: [8645] + def get("↕"), do: [8597] + def get("⥮"), do: [10606] + def get("⊥"), do: [8869] + def get("↥"), do: [8613] + def get("⇑"), do: [8657] + def get("⇕"), do: [8661] + def get("↖"), do: [8598] + def get("↗"), do: [8599] + def get("ϒ"), do: [978] + def get("Υ"), do: [933] + def get("Ů"), do: [366] + def get("𝒰"), do: [119_984] + def get("Ũ"), do: [360] + def get("Ü"), do: [220] + def get("Ü"), do: [220] + def get("⊫"), do: [8875] + def get("⫫"), do: [10987] + def get("В"), do: [1042] + def get("⊩"), do: [8873] + def get("⫦"), do: [10982] + def get("⋁"), do: [8897] + def get("‖"), do: [8214] + def get("‖"), do: [8214] + def get("∣"), do: [8739] + def get("|"), do: [124] + def get("❘"), do: [10072] + def get("≀"), do: [8768] + def get(" "), do: [8202] + def get("𝔙"), do: [120_089] + def get("𝕍"), do: [120_141] + def get("𝒱"), do: [119_985] + def get("⊪"), do: [8874] + def get("Ŵ"), do: [372] + def get("⋀"), do: [8896] + def get("𝔚"), do: [120_090] + def get("𝕎"), do: [120_142] + def get("𝒲"), do: [119_986] + def get("𝔛"), do: [120_091] + def get("Ξ"), do: [926] + def get("𝕏"), do: [120_143] + def get("𝒳"), do: [119_987] + def get("Я"), do: [1071] + def get("Ї"), do: [1031] + def get("Ю"), do: [1070] + def get("Ý"), do: [221] + def get("Ý"), do: [221] + def get("Ŷ"), do: [374] + def get("Ы"), do: [1067] + def get("𝔜"), do: [120_092] + def get("𝕐"), do: [120_144] + def get("𝒴"), do: [119_988] + def get("Ÿ"), do: [376] + def get("Ж"), do: [1046] + def get("Ź"), do: [377] + def get("Ž"), do: [381] + def get("З"), do: [1047] + def get("Ż"), do: [379] + def get("​"), do: [8203] + def get("Ζ"), do: [918] + def get("ℨ"), do: [8488] + def get("ℤ"), do: [8484] + def get("𝒵"), do: [119_989] + def get("á"), do: [225] + def get("á"), do: [225] + def get("ă"), do: [259] + def get("∾"), do: [8766] + def get("∾̳"), do: [8766, 819] + def get("∿"), do: [8767] + def get("â"), do: [226] + def get("â"), do: [226] + def get("´"), do: [180] + def get("´"), do: [180] + def get("а"), do: [1072] + def get("æ"), do: [230] + def get("æ"), do: [230] + def get("⁡"), do: [8289] + def get("𝔞"), do: [120_094] + def get("à"), do: [224] + def get("à"), do: [224] + def get("ℵ"), do: [8501] + def get("ℵ"), do: [8501] + def get("α"), do: [945] + def get("ā"), do: [257] + def get("⨿"), do: [10815] + def get("&"), do: [38] + def get("&"), do: [38] + def get("∧"), do: [8743] + def get("⩕"), do: [10837] + def get("⩜"), do: [10844] + def get("⩘"), do: [10840] + def get("⩚"), do: [10842] + def get("∠"), do: [8736] + def get("⦤"), do: [10660] + def get("∠"), do: [8736] + def get("∡"), do: [8737] + def get("⦨"), do: [10664] + def get("⦩"), do: [10665] + def get("⦪"), do: [10666] + def get("⦫"), do: [10667] + def get("⦬"), do: [10668] + def get("⦭"), do: [10669] + def get("⦮"), do: [10670] + def get("⦯"), do: [10671] + def get("∟"), do: [8735] + def get("⊾"), do: [8894] + def get("⦝"), do: [10653] + def get("∢"), do: [8738] + def get("Å"), do: [197] + def get("⍼"), do: [9084] + def get("ą"), do: [261] + def get("𝕒"), do: [120_146] + def get("≈"), do: [8776] + def get("⩰"), do: [10864] + def get("⩯"), do: [10863] + def get("≊"), do: [8778] + def get("≋"), do: [8779] + def get("'"), do: [39] + def get("≈"), do: [8776] + def get("≊"), do: [8778] + def get("å"), do: [229] + def get("å"), do: [229] + def get("𝒶"), do: [119_990] + def get("*"), do: [42] + def get("≈"), do: [8776] + def get("≍"), do: [8781] + def get("ã"), do: [227] + def get("ã"), do: [227] + def get("ä"), do: [228] + def get("ä"), do: [228] + def get("∳"), do: [8755] + def get("⨑"), do: [10769] + def get("⫭"), do: [10989] + def get("≌"), do: [8780] + def get("϶"), do: [1014] + def get("‵"), do: [8245] + def get("∽"), do: [8765] + def get("⋍"), do: [8909] + def get("⊽"), do: [8893] + def get("⌅"), do: [8965] + def get("⌅"), do: [8965] + def get("⎵"), do: [9141] + def get("⎶"), do: [9142] + def get("≌"), do: [8780] + def get("б"), do: [1073] + def get("„"), do: [8222] + def get("∵"), do: [8757] + def get("∵"), do: [8757] + def get("⦰"), do: [10672] + def get("϶"), do: [1014] + def get("ℬ"), do: [8492] + def get("β"), do: [946] + def get("ℶ"), do: [8502] + def get("≬"), do: [8812] + def get("𝔟"), do: [120_095] + def get("⋂"), do: [8898] + def get("◯"), do: [9711] + def get("⋃"), do: [8899] + def get("⨀"), do: [10752] + def get("⨁"), do: [10753] + def get("⨂"), do: [10754] + def get("⨆"), do: [10758] + def get("★"), do: [9733] + def get("▽"), do: [9661] + def get("△"), do: [9651] + def get("⨄"), do: [10756] + def get("⋁"), do: [8897] + def get("⋀"), do: [8896] + def get("⤍"), do: [10509] + def get("⧫"), do: [10731] + def get("▪"), do: [9642] + def get("▴"), do: [9652] + def get("▾"), do: [9662] + def get("◂"), do: [9666] + def get("▸"), do: [9656] + def get("␣"), do: [9251] + def get("▒"), do: [9618] + def get("░"), do: [9617] + def get("▓"), do: [9619] + def get("█"), do: [9608] + def get("=⃥"), do: [61, 8421] + def get("≡⃥"), do: [8801, 8421] + def get("⌐"), do: [8976] + def get("𝕓"), do: [120_147] + def get("⊥"), do: [8869] + def get("⊥"), do: [8869] + def get("⋈"), do: [8904] + def get("╗"), do: [9559] + def get("╔"), do: [9556] + def get("╖"), do: [9558] + def get("╓"), do: [9555] + def get("═"), do: [9552] + def get("╦"), do: [9574] + def get("╩"), do: [9577] + def get("╤"), do: [9572] + def get("╧"), do: [9575] + def get("╝"), do: [9565] + def get("╚"), do: [9562] + def get("╜"), do: [9564] + def get("╙"), do: [9561] + def get("║"), do: [9553] + def get("╬"), do: [9580] + def get("╣"), do: [9571] + def get("╠"), do: [9568] + def get("╫"), do: [9579] + def get("╢"), do: [9570] + def get("╟"), do: [9567] + def get("⧉"), do: [10697] + def get("╕"), do: [9557] + def get("╒"), do: [9554] + def get("┐"), do: [9488] + def get("┌"), do: [9484] + def get("─"), do: [9472] + def get("╥"), do: [9573] + def get("╨"), do: [9576] + def get("┬"), do: [9516] + def get("┴"), do: [9524] + def get("⊟"), do: [8863] + def get("⊞"), do: [8862] + def get("⊠"), do: [8864] + def get("╛"), do: [9563] + def get("╘"), do: [9560] + def get("┘"), do: [9496] + def get("└"), do: [9492] + def get("│"), do: [9474] + def get("╪"), do: [9578] + def get("╡"), do: [9569] + def get("╞"), do: [9566] + def get("┼"), do: [9532] + def get("┤"), do: [9508] + def get("├"), do: [9500] + def get("‵"), do: [8245] + def get("˘"), do: [728] + def get("¦"), do: [166] + def get("¦"), do: [166] + def get("𝒷"), do: [119_991] + def get("⁏"), do: [8271] + def get("∽"), do: [8765] + def get("⋍"), do: [8909] + def get("\"), do: [92] + def get("⧅"), do: [10693] + def get("⟈"), do: [10184] + def get("•"), do: [8226] + def get("•"), do: [8226] + def get("≎"), do: [8782] + def get("⪮"), do: [10926] + def get("≏"), do: [8783] + def get("≏"), do: [8783] + def get("ć"), do: [263] + def get("∩"), do: [8745] + def get("⩄"), do: [10820] + def get("⩉"), do: [10825] + def get("⩋"), do: [10827] + def get("⩇"), do: [10823] + def get("⩀"), do: [10816] + def get("∩︀"), do: [8745, 65024] + def get("⁁"), do: [8257] + def get("ˇ"), do: [711] + def get("⩍"), do: [10829] + def get("č"), do: [269] + def get("ç"), do: [231] + def get("ç"), do: [231] + def get("ĉ"), do: [265] + def get("⩌"), do: [10828] + def get("⩐"), do: [10832] + def get("ċ"), do: [267] + def get("¸"), do: [184] + def get("¸"), do: [184] + def get("⦲"), do: [10674] + def get("¢"), do: [162] + def get("¢"), do: [162] + def get("·"), do: [183] + def get("𝔠"), do: [120_096] + def get("ч"), do: [1095] + def get("✓"), do: [10003] + def get("✓"), do: [10003] + def get("χ"), do: [967] + def get("○"), do: [9675] + def get("⧃"), do: [10691] + def get("ˆ"), do: [710] + def get("≗"), do: [8791] + def get("↺"), do: [8634] + def get("↻"), do: [8635] + def get("®"), do: [174] + def get("Ⓢ"), do: [9416] + def get("⊛"), do: [8859] + def get("⊚"), do: [8858] + def get("⊝"), do: [8861] + def get("≗"), do: [8791] + def get("⨐"), do: [10768] + def get("⫯"), do: [10991] + def get("⧂"), do: [10690] + def get("♣"), do: [9827] + def get("♣"), do: [9827] + def get(":"), do: [58] + def get("≔"), do: [8788] + def get("≔"), do: [8788] + def get(","), do: [44] + def get("@"), do: [64] + def get("∁"), do: [8705] + def get("∘"), do: [8728] + def get("∁"), do: [8705] + def get("ℂ"), do: [8450] + def get("≅"), do: [8773] + def get("⩭"), do: [10861] + def get("∮"), do: [8750] + def get("𝕔"), do: [120_148] + def get("∐"), do: [8720] + def get("©"), do: [169] + def get("©"), do: [169] + def get("℗"), do: [8471] + def get("↵"), do: [8629] + def get("✗"), do: [10007] + def get("𝒸"), do: [119_992] + def get("⫏"), do: [10959] + def get("⫑"), do: [10961] + def get("⫐"), do: [10960] + def get("⫒"), do: [10962] + def get("⋯"), do: [8943] + def get("⤸"), do: [10552] + def get("⤵"), do: [10549] + def get("⋞"), do: [8926] + def get("⋟"), do: [8927] + def get("↶"), do: [8630] + def get("⤽"), do: [10557] + def get("∪"), do: [8746] + def get("⩈"), do: [10824] + def get("⩆"), do: [10822] + def get("⩊"), do: [10826] + def get("⊍"), do: [8845] + def get("⩅"), do: [10821] + def get("∪︀"), do: [8746, 65024] + def get("↷"), do: [8631] + def get("⤼"), do: [10556] + def get("⋞"), do: [8926] + def get("⋟"), do: [8927] + def get("⋎"), do: [8910] + def get("⋏"), do: [8911] + def get("¤"), do: [164] + def get("¤"), do: [164] + def get("↶"), do: [8630] + def get("↷"), do: [8631] + def get("⋎"), do: [8910] + def get("⋏"), do: [8911] + def get("∲"), do: [8754] + def get("∱"), do: [8753] + def get("⌭"), do: [9005] + def get("⇓"), do: [8659] + def get("⥥"), do: [10597] + def get("†"), do: [8224] + def get("ℸ"), do: [8504] + def get("↓"), do: [8595] + def get("‐"), do: [8208] + def get("⊣"), do: [8867] + def get("⤏"), do: [10511] + def get("˝"), do: [733] + def get("ď"), do: [271] + def get("д"), do: [1076] + def get("ⅆ"), do: [8518] + def get("‡"), do: [8225] + def get("⇊"), do: [8650] + def get("⩷"), do: [10871] + def get("°"), do: [176] + def get("°"), do: [176] + def get("δ"), do: [948] + def get("⦱"), do: [10673] + def get("⥿"), do: [10623] + def get("𝔡"), do: [120_097] + def get("⇃"), do: [8643] + def get("⇂"), do: [8642] + def get("⋄"), do: [8900] + def get("⋄"), do: [8900] + def get("♦"), do: [9830] + def get("♦"), do: [9830] + def get("¨"), do: [168] + def get("ϝ"), do: [989] + def get("⋲"), do: [8946] + def get("÷"), do: [247] + def get("÷"), do: [247] + def get("÷"), do: [247] + def get("⋇"), do: [8903] + def get("⋇"), do: [8903] + def get("ђ"), do: [1106] + def get("⌞"), do: [8990] + def get("⌍"), do: [8973] + def get("$"), do: [36] + def get("𝕕"), do: [120_149] + def get("˙"), do: [729] + def get("≐"), do: [8784] + def get("≑"), do: [8785] + def get("∸"), do: [8760] + def get("∔"), do: [8724] + def get("⊡"), do: [8865] + def get("⌆"), do: [8966] + def get("↓"), do: [8595] + def get("⇊"), do: [8650] + def get("⇃"), do: [8643] + def get("⇂"), do: [8642] + def get("⤐"), do: [10512] + def get("⌟"), do: [8991] + def get("⌌"), do: [8972] + def get("𝒹"), do: [119_993] + def get("ѕ"), do: [1109] + def get("⧶"), do: [10742] + def get("đ"), do: [273] + def get("⋱"), do: [8945] + def get("▿"), do: [9663] + def get("▾"), do: [9662] + def get("⇵"), do: [8693] + def get("⥯"), do: [10607] + def get("⦦"), do: [10662] + def get("џ"), do: [1119] + def get("⟿"), do: [10239] + def get("⩷"), do: [10871] + def get("≑"), do: [8785] + def get("é"), do: [233] + def get("é"), do: [233] + def get("⩮"), do: [10862] + def get("ě"), do: [283] + def get("≖"), do: [8790] + def get("ê"), do: [234] + def get("ê"), do: [234] + def get("≕"), do: [8789] + def get("э"), do: [1101] + def get("ė"), do: [279] + def get("ⅇ"), do: [8519] + def get("≒"), do: [8786] + def get("𝔢"), do: [120_098] + def get("⪚"), do: [10906] + def get("è"), do: [232] + def get("è"), do: [232] + def get("⪖"), do: [10902] + def get("⪘"), do: [10904] + def get("⪙"), do: [10905] + def get("⏧"), do: [9191] + def get("ℓ"), do: [8467] + def get("⪕"), do: [10901] + def get("⪗"), do: [10903] + def get("ē"), do: [275] + def get("∅"), do: [8709] + def get("∅"), do: [8709] + def get("∅"), do: [8709] + def get(" "), do: [8196] + def get(" "), do: [8197] + def get(" "), do: [8195] + def get("ŋ"), do: [331] + def get(" "), do: [8194] + def get("ę"), do: [281] + def get("𝕖"), do: [120_150] + def get("⋕"), do: [8917] + def get("⧣"), do: [10723] + def get("⩱"), do: [10865] + def get("ε"), do: [949] + def get("ε"), do: [949] + def get("ϵ"), do: [1013] + def get("≖"), do: [8790] + def get("≕"), do: [8789] + def get("≂"), do: [8770] + def get("⪖"), do: [10902] + def get("⪕"), do: [10901] + def get("="), do: [61] + def get("≟"), do: [8799] + def get("≡"), do: [8801] + def get("⩸"), do: [10872] + def get("⧥"), do: [10725] + def get("≓"), do: [8787] + def get("⥱"), do: [10609] + def get("ℯ"), do: [8495] + def get("≐"), do: [8784] + def get("≂"), do: [8770] + def get("η"), do: [951] + def get("ð"), do: [240] + def get("ð"), do: [240] + def get("ë"), do: [235] + def get("ë"), do: [235] + def get("€"), do: [8364] + def get("!"), do: [33] + def get("∃"), do: [8707] + def get("ℰ"), do: [8496] + def get("ⅇ"), do: [8519] + def get("≒"), do: [8786] + def get("ф"), do: [1092] + def get("♀"), do: [9792] + def get("ffi"), do: [64259] + def get("ff"), do: [64256] + def get("ffl"), do: [64260] + def get("𝔣"), do: [120_099] + def get("fi"), do: [64257] + def get("fj"), do: [102, 106] + def get("♭"), do: [9837] + def get("fl"), do: [64258] + def get("▱"), do: [9649] + def get("ƒ"), do: [402] + def get("𝕗"), do: [120_151] + def get("∀"), do: [8704] + def get("⋔"), do: [8916] + def get("⫙"), do: [10969] + def get("⨍"), do: [10765] + def get("½"), do: [189] + def get("½"), do: [189] + def get("⅓"), do: [8531] + def get("¼"), do: [188] + def get("¼"), do: [188] + def get("⅕"), do: [8533] + def get("⅙"), do: [8537] + def get("⅛"), do: [8539] + def get("⅔"), do: [8532] + def get("⅖"), do: [8534] + def get("¾"), do: [190] + def get("¾"), do: [190] + def get("⅗"), do: [8535] + def get("⅜"), do: [8540] + def get("⅘"), do: [8536] + def get("⅚"), do: [8538] + def get("⅝"), do: [8541] + def get("⅞"), do: [8542] + def get("⁄"), do: [8260] + def get("⌢"), do: [8994] + def get("𝒻"), do: [119_995] + def get("≧"), do: [8807] + def get("⪌"), do: [10892] + def get("ǵ"), do: [501] + def get("γ"), do: [947] + def get("ϝ"), do: [989] + def get("⪆"), do: [10886] + def get("ğ"), do: [287] + def get("ĝ"), do: [285] + def get("г"), do: [1075] + def get("ġ"), do: [289] + def get("≥"), do: [8805] + def get("⋛"), do: [8923] + def get("≥"), do: [8805] + def get("≧"), do: [8807] + def get("⩾"), do: [10878] + def get("⩾"), do: [10878] + def get("⪩"), do: [10921] + def get("⪀"), do: [10880] + def get("⪂"), do: [10882] + def get("⪄"), do: [10884] + def get("⋛︀"), do: [8923, 65024] + def get("⪔"), do: [10900] + def get("𝔤"), do: [120_100] + def get("≫"), do: [8811] + def get("⋙"), do: [8921] + def get("ℷ"), do: [8503] + def get("ѓ"), do: [1107] + def get("≷"), do: [8823] + def get("⪒"), do: [10898] + def get("⪥"), do: [10917] + def get("⪤"), do: [10916] + def get("≩"), do: [8809] + def get("⪊"), do: [10890] + def get("⪊"), do: [10890] + def get("⪈"), do: [10888] + def get("⪈"), do: [10888] + def get("≩"), do: [8809] + def get("⋧"), do: [8935] + def get("𝕘"), do: [120_152] + def get("`"), do: [96] + def get("ℊ"), do: [8458] + def get("≳"), do: [8819] + def get("⪎"), do: [10894] + def get("⪐"), do: [10896] + def get(">"), do: [62] + def get(">"), do: [62] + def get("⪧"), do: [10919] + def get("⩺"), do: [10874] + def get("⋗"), do: [8919] + def get("⦕"), do: [10645] + def get("⩼"), do: [10876] + def get("⪆"), do: [10886] + def get("⥸"), do: [10616] + def get("⋗"), do: [8919] + def get("⋛"), do: [8923] + def get("⪌"), do: [10892] + def get("≷"), do: [8823] + def get("≳"), do: [8819] + def get("≩︀"), do: [8809, 65024] + def get("≩︀"), do: [8809, 65024] + def get("⇔"), do: [8660] + def get(" "), do: [8202] + def get("½"), do: [189] + def get("ℋ"), do: [8459] + def get("ъ"), do: [1098] + def get("↔"), do: [8596] + def get("⥈"), do: [10568] + def get("↭"), do: [8621] + def get("ℏ"), do: [8463] + def get("ĥ"), do: [293] + def get("♥"), do: [9829] + def get("♥"), do: [9829] + def get("…"), do: [8230] + def get("⊹"), do: [8889] + def get("𝔥"), do: [120_101] + def get("⤥"), do: [10533] + def get("⤦"), do: [10534] + def get("⇿"), do: [8703] + def get("∻"), do: [8763] + def get("↩"), do: [8617] + def get("↪"), do: [8618] + def get("𝕙"), do: [120_153] + def get("―"), do: [8213] + def get("𝒽"), do: [119_997] + def get("ℏ"), do: [8463] + def get("ħ"), do: [295] + def get("⁃"), do: [8259] + def get("‐"), do: [8208] + def get("í"), do: [237] + def get("í"), do: [237] + def get("⁣"), do: [8291] + def get("î"), do: [238] + def get("î"), do: [238] + def get("и"), do: [1080] + def get("е"), do: [1077] + def get("¡"), do: [161] + def get("¡"), do: [161] + def get("⇔"), do: [8660] + def get("𝔦"), do: [120_102] + def get("ì"), do: [236] + def get("ì"), do: [236] + def get("ⅈ"), do: [8520] + def get("⨌"), do: [10764] + def get("∭"), do: [8749] + def get("⧜"), do: [10716] + def get("℩"), do: [8489] + def get("ij"), do: [307] + def get("ī"), do: [299] + def get("ℑ"), do: [8465] + def get("ℐ"), do: [8464] + def get("ℑ"), do: [8465] + def get("ı"), do: [305] + def get("⊷"), do: [8887] + def get("Ƶ"), do: [437] + def get("∈"), do: [8712] + def get("℅"), do: [8453] + def get("∞"), do: [8734] + def get("⧝"), do: [10717] + def get("ı"), do: [305] + def get("∫"), do: [8747] + def get("⊺"), do: [8890] + def get("ℤ"), do: [8484] + def get("⊺"), do: [8890] + def get("⨗"), do: [10775] + def get("⨼"), do: [10812] + def get("ё"), do: [1105] + def get("į"), do: [303] + def get("𝕚"), do: [120_154] + def get("ι"), do: [953] + def get("⨼"), do: [10812] + def get("¿"), do: [191] + def get("¿"), do: [191] + def get("𝒾"), do: [119_998] + def get("∈"), do: [8712] + def get("⋹"), do: [8953] + def get("⋵"), do: [8949] + def get("⋴"), do: [8948] + def get("⋳"), do: [8947] + def get("∈"), do: [8712] + def get("⁢"), do: [8290] + def get("ĩ"), do: [297] + def get("і"), do: [1110] + def get("ï"), do: [239] + def get("ï"), do: [239] + def get("ĵ"), do: [309] + def get("й"), do: [1081] + def get("𝔧"), do: [120_103] + def get("ȷ"), do: [567] + def get("𝕛"), do: [120_155] + def get("𝒿"), do: [119_999] + def get("ј"), do: [1112] + def get("є"), do: [1108] + def get("κ"), do: [954] + def get("ϰ"), do: [1008] + def get("ķ"), do: [311] + def get("к"), do: [1082] + def get("𝔨"), do: [120_104] + def get("ĸ"), do: [312] + def get("х"), do: [1093] + def get("ќ"), do: [1116] + def get("𝕜"), do: [120_156] + def get("𝓀"), do: [120_000] + def get("⇚"), do: [8666] + def get("⇐"), do: [8656] + def get("⤛"), do: [10523] + def get("⤎"), do: [10510] + def get("≦"), do: [8806] + def get("⪋"), do: [10891] + def get("⥢"), do: [10594] + def get("ĺ"), do: [314] + def get("⦴"), do: [10676] + def get("ℒ"), do: [8466] + def get("λ"), do: [955] + def get("⟨"), do: [10216] + def get("⦑"), do: [10641] + def get("⟨"), do: [10216] + def get("⪅"), do: [10885] + def get("«"), do: [171] + def get("«"), do: [171] + def get("←"), do: [8592] + def get("⇤"), do: [8676] + def get("⤟"), do: [10527] + def get("⤝"), do: [10525] + def get("↩"), do: [8617] + def get("↫"), do: [8619] + def get("⤹"), do: [10553] + def get("⥳"), do: [10611] + def get("↢"), do: [8610] + def get("⪫"), do: [10923] + def get("⤙"), do: [10521] + def get("⪭"), do: [10925] + def get("⪭︀"), do: [10925, 65024] + def get("⤌"), do: [10508] + def get("❲"), do: [10098] + def get("{"), do: [123] + def get("["), do: [91] + def get("⦋"), do: [10635] + def get("⦏"), do: [10639] + def get("⦍"), do: [10637] + def get("ľ"), do: [318] + def get("ļ"), do: [316] + def get("⌈"), do: [8968] + def get("{"), do: [123] + def get("л"), do: [1083] + def get("⤶"), do: [10550] + def get("“"), do: [8220] + def get("„"), do: [8222] + def get("⥧"), do: [10599] + def get("⥋"), do: [10571] + def get("↲"), do: [8626] + def get("≤"), do: [8804] + def get("←"), do: [8592] + def get("↢"), do: [8610] + def get("↽"), do: [8637] + def get("↼"), do: [8636] + def get("⇇"), do: [8647] + def get("↔"), do: [8596] + def get("⇆"), do: [8646] + def get("⇋"), do: [8651] + def get("↭"), do: [8621] + def get("⋋"), do: [8907] + def get("⋚"), do: [8922] + def get("≤"), do: [8804] + def get("≦"), do: [8806] + def get("⩽"), do: [10877] + def get("⩽"), do: [10877] + def get("⪨"), do: [10920] + def get("⩿"), do: [10879] + def get("⪁"), do: [10881] + def get("⪃"), do: [10883] + def get("⋚︀"), do: [8922, 65024] + def get("⪓"), do: [10899] + def get("⪅"), do: [10885] + def get("⋖"), do: [8918] + def get("⋚"), do: [8922] + def get("⪋"), do: [10891] + def get("≶"), do: [8822] + def get("≲"), do: [8818] + def get("⥼"), do: [10620] + def get("⌊"), do: [8970] + def get("𝔩"), do: [120_105] + def get("≶"), do: [8822] + def get("⪑"), do: [10897] + def get("↽"), do: [8637] + def get("↼"), do: [8636] + def get("⥪"), do: [10602] + def get("▄"), do: [9604] + def get("љ"), do: [1113] + def get("≪"), do: [8810] + def get("⇇"), do: [8647] + def get("⌞"), do: [8990] + def get("⥫"), do: [10603] + def get("◺"), do: [9722] + def get("ŀ"), do: [320] + def get("⎰"), do: [9136] + def get("⎰"), do: [9136] + def get("≨"), do: [8808] + def get("⪉"), do: [10889] + def get("⪉"), do: [10889] + def get("⪇"), do: [10887] + def get("⪇"), do: [10887] + def get("≨"), do: [8808] + def get("⋦"), do: [8934] + def get("⟬"), do: [10220] + def get("⇽"), do: [8701] + def get("⟦"), do: [10214] + def get("⟵"), do: [10229] + def get("⟷"), do: [10231] + def get("⟼"), do: [10236] + def get("⟶"), do: [10230] + def get("↫"), do: [8619] + def get("↬"), do: [8620] + def get("⦅"), do: [10629] + def get("𝕝"), do: [120_157] + def get("⨭"), do: [10797] + def get("⨴"), do: [10804] + def get("∗"), do: [8727] + def get("_"), do: [95] + def get("◊"), do: [9674] + def get("◊"), do: [9674] + def get("⧫"), do: [10731] + def get("("), do: [40] + def get("⦓"), do: [10643] + def get("⇆"), do: [8646] + def get("⌟"), do: [8991] + def get("⇋"), do: [8651] + def get("⥭"), do: [10605] + def get("‎"), do: [8206] + def get("⊿"), do: [8895] + def get("‹"), do: [8249] + def get("𝓁"), do: [120_001] + def get("↰"), do: [8624] + def get("≲"), do: [8818] + def get("⪍"), do: [10893] + def get("⪏"), do: [10895] + def get("["), do: [91] + def get("‘"), do: [8216] + def get("‚"), do: [8218] + def get("ł"), do: [322] + def get("<"), do: [60] + def get("<"), do: [60] + def get("⪦"), do: [10918] + def get("⩹"), do: [10873] + def get("⋖"), do: [8918] + def get("⋋"), do: [8907] + def get("⋉"), do: [8905] + def get("⥶"), do: [10614] + def get("⩻"), do: [10875] + def get("⦖"), do: [10646] + def get("◃"), do: [9667] + def get("⊴"), do: [8884] + def get("◂"), do: [9666] + def get("⥊"), do: [10570] + def get("⥦"), do: [10598] + def get("≨︀"), do: [8808, 65024] + def get("≨︀"), do: [8808, 65024] + def get("∺"), do: [8762] + def get("¯"), do: [175] + def get("¯"), do: [175] + def get("♂"), do: [9794] + def get("✠"), do: [10016] + def get("✠"), do: [10016] + def get("↦"), do: [8614] + def get("↦"), do: [8614] + def get("↧"), do: [8615] + def get("↤"), do: [8612] + def get("↥"), do: [8613] + def get("▮"), do: [9646] + def get("⨩"), do: [10793] + def get("м"), do: [1084] + def get("—"), do: [8212] + def get("∡"), do: [8737] + def get("𝔪"), do: [120_106] + def get("℧"), do: [8487] + def get("µ"), do: [181] + def get("µ"), do: [181] + def get("∣"), do: [8739] + def get("*"), do: [42] + def get("⫰"), do: [10992] + def get("·"), do: [183] + def get("·"), do: [183] + def get("−"), do: [8722] + def get("⊟"), do: [8863] + def get("∸"), do: [8760] + def get("⨪"), do: [10794] + def get("⫛"), do: [10971] + def get("…"), do: [8230] + def get("∓"), do: [8723] + def get("⊧"), do: [8871] + def get("𝕞"), do: [120_158] + def get("∓"), do: [8723] + def get("𝓂"), do: [120_002] + def get("∾"), do: [8766] + def get("μ"), do: [956] + def get("⊸"), do: [8888] + def get("⊸"), do: [8888] + def get("⋙̸"), do: [8921, 824] + def get("≫⃒"), do: [8811, 8402] + def get("≫̸"), do: [8811, 824] + def get("⇍"), do: [8653] + def get("⇎"), do: [8654] + def get("⋘̸"), do: [8920, 824] + def get("≪⃒"), do: [8810, 8402] + def get("≪̸"), do: [8810, 824] + def get("⇏"), do: [8655] + def get("⊯"), do: [8879] + def get("⊮"), do: [8878] + def get("∇"), do: [8711] + def get("ń"), do: [324] + def get("∠⃒"), do: [8736, 8402] + def get("≉"), do: [8777] + def get("⩰̸"), do: [10864, 824] + def get("≋̸"), do: [8779, 824] + def get("ʼn"), do: [329] + def get("≉"), do: [8777] + def get("♮"), do: [9838] + def get("♮"), do: [9838] + def get("ℕ"), do: [8469] + def get(" "), do: [160] + def get(" "), do: [160] + def get("≎̸"), do: [8782, 824] + def get("≏̸"), do: [8783, 824] + def get("⩃"), do: [10819] + def get("ň"), do: [328] + def get("ņ"), do: [326] + def get("≇"), do: [8775] + def get("⩭̸"), do: [10861, 824] + def get("⩂"), do: [10818] + def get("н"), do: [1085] + def get("–"), do: [8211] + def get("≠"), do: [8800] + def get("⇗"), do: [8663] + def get("⤤"), do: [10532] + def get("↗"), do: [8599] + def get("↗"), do: [8599] + def get("≐̸"), do: [8784, 824] + def get("≢"), do: [8802] + def get("⤨"), do: [10536] + def get("≂̸"), do: [8770, 824] + def get("∄"), do: [8708] + def get("∄"), do: [8708] + def get("𝔫"), do: [120_107] + def get("≧̸"), do: [8807, 824] + def get("≱"), do: [8817] + def get("≱"), do: [8817] + def get("≧̸"), do: [8807, 824] + def get("⩾̸"), do: [10878, 824] + def get("⩾̸"), do: [10878, 824] + def get("≵"), do: [8821] + def get("≯"), do: [8815] + def get("≯"), do: [8815] + def get("⇎"), do: [8654] + def get("↮"), do: [8622] + def get("⫲"), do: [10994] + def get("∋"), do: [8715] + def get("⋼"), do: [8956] + def get("⋺"), do: [8954] + def get("∋"), do: [8715] + def get("њ"), do: [1114] + def get("⇍"), do: [8653] + def get("≦̸"), do: [8806, 824] + def get("↚"), do: [8602] + def get("‥"), do: [8229] + def get("≰"), do: [8816] + def get("↚"), do: [8602] + def get("↮"), do: [8622] + def get("≰"), do: [8816] + def get("≦̸"), do: [8806, 824] + def get("⩽̸"), do: [10877, 824] + def get("⩽̸"), do: [10877, 824] + def get("≮"), do: [8814] + def get("≴"), do: [8820] + def get("≮"), do: [8814] + def get("⋪"), do: [8938] + def get("⋬"), do: [8940] + def get("∤"), do: [8740] + def get("𝕟"), do: [120_159] + def get("¬"), do: [172] + def get("¬"), do: [172] + def get("∉"), do: [8713] + def get("⋹̸"), do: [8953, 824] + def get("⋵̸"), do: [8949, 824] + def get("∉"), do: [8713] + def get("⋷"), do: [8951] + def get("⋶"), do: [8950] + def get("∌"), do: [8716] + def get("∌"), do: [8716] + def get("⋾"), do: [8958] + def get("⋽"), do: [8957] + def get("∦"), do: [8742] + def get("∦"), do: [8742] + def get("⫽⃥"), do: [11005, 8421] + def get("∂̸"), do: [8706, 824] + def get("⨔"), do: [10772] + def get("⊀"), do: [8832] + def get("⋠"), do: [8928] + def get("⪯̸"), do: [10927, 824] + def get("⊀"), do: [8832] + def get("⪯̸"), do: [10927, 824] + def get("⇏"), do: [8655] + def get("↛"), do: [8603] + def get("⤳̸"), do: [10547, 824] + def get("↝̸"), do: [8605, 824] + def get("↛"), do: [8603] + def get("⋫"), do: [8939] + def get("⋭"), do: [8941] + def get("⊁"), do: [8833] + def get("⋡"), do: [8929] + def get("⪰̸"), do: [10928, 824] + def get("𝓃"), do: [120_003] + def get("∤"), do: [8740] + def get("∦"), do: [8742] + def get("≁"), do: [8769] + def get("≄"), do: [8772] + def get("≄"), do: [8772] + def get("∤"), do: [8740] + def get("∦"), do: [8742] + def get("⋢"), do: [8930] + def get("⋣"), do: [8931] + def get("⊄"), do: [8836] + def get("⫅̸"), do: [10949, 824] + def get("⊈"), do: [8840] + def get("⊂⃒"), do: [8834, 8402] + def get("⊈"), do: [8840] + def get("⫅̸"), do: [10949, 824] + def get("⊁"), do: [8833] + def get("⪰̸"), do: [10928, 824] + def get("⊅"), do: [8837] + def get("⫆̸"), do: [10950, 824] + def get("⊉"), do: [8841] + def get("⊃⃒"), do: [8835, 8402] + def get("⊉"), do: [8841] + def get("⫆̸"), do: [10950, 824] + def get("≹"), do: [8825] + def get("ñ"), do: [241] + def get("ñ"), do: [241] + def get("≸"), do: [8824] + def get("⋪"), do: [8938] + def get("⋬"), do: [8940] + def get("⋫"), do: [8939] + def get("⋭"), do: [8941] + def get("ν"), do: [957] + def get("#"), do: [35] + def get("№"), do: [8470] + def get(" "), do: [8199] + def get("⊭"), do: [8877] + def get("⤄"), do: [10500] + def get("≍⃒"), do: [8781, 8402] + def get("⊬"), do: [8876] + def get("≥⃒"), do: [8805, 8402] + def get(">⃒"), do: [62, 8402] + def get("⧞"), do: [10718] + def get("⤂"), do: [10498] + def get("≤⃒"), do: [8804, 8402] + def get("<⃒"), do: [60, 8402] + def get("⊴⃒"), do: [8884, 8402] + def get("⤃"), do: [10499] + def get("⊵⃒"), do: [8885, 8402] + def get("∼⃒"), do: [8764, 8402] + def get("⇖"), do: [8662] + def get("⤣"), do: [10531] + def get("↖"), do: [8598] + def get("↖"), do: [8598] + def get("⤧"), do: [10535] + def get("Ⓢ"), do: [9416] + def get("ó"), do: [243] + def get("ó"), do: [243] + def get("⊛"), do: [8859] + def get("⊚"), do: [8858] + def get("ô"), do: [244] + def get("ô"), do: [244] + def get("о"), do: [1086] + def get("⊝"), do: [8861] + def get("ő"), do: [337] + def get("⨸"), do: [10808] + def get("⊙"), do: [8857] + def get("⦼"), do: [10684] + def get("œ"), do: [339] + def get("⦿"), do: [10687] + def get("𝔬"), do: [120_108] + def get("˛"), do: [731] + def get("ò"), do: [242] + def get("ò"), do: [242] + def get("⧁"), do: [10689] + def get("⦵"), do: [10677] + def get("Ω"), do: [937] + def get("∮"), do: [8750] + def get("↺"), do: [8634] + def get("⦾"), do: [10686] + def get("⦻"), do: [10683] + def get("‾"), do: [8254] + def get("⧀"), do: [10688] + def get("ō"), do: [333] + def get("ω"), do: [969] + def get("ο"), do: [959] + def get("⦶"), do: [10678] + def get("⊖"), do: [8854] + def get("𝕠"), do: [120_160] + def get("⦷"), do: [10679] + def get("⦹"), do: [10681] + def get("⊕"), do: [8853] + def get("∨"), do: [8744] + def get("↻"), do: [8635] + def get("⩝"), do: [10845] + def get("ℴ"), do: [8500] + def get("ℴ"), do: [8500] + def get("ª"), do: [170] + def get("ª"), do: [170] + def get("º"), do: [186] + def get("º"), do: [186] + def get("⊶"), do: [8886] + def get("⩖"), do: [10838] + def get("⩗"), do: [10839] + def get("⩛"), do: [10843] + def get("ℴ"), do: [8500] + def get("ø"), do: [248] + def get("ø"), do: [248] + def get("⊘"), do: [8856] + def get("õ"), do: [245] + def get("õ"), do: [245] + def get("⊗"), do: [8855] + def get("⨶"), do: [10806] + def get("ö"), do: [246] + def get("ö"), do: [246] + def get("⌽"), do: [9021] + def get("∥"), do: [8741] + def get("¶"), do: [182] + def get("¶"), do: [182] + def get("∥"), do: [8741] + def get("⫳"), do: [10995] + def get("⫽"), do: [11005] + def get("∂"), do: [8706] + def get("п"), do: [1087] + def get("%"), do: [37] + def get("."), do: [46] + def get("‰"), do: [8240] + def get("⊥"), do: [8869] + def get("‱"), do: [8241] + def get("𝔭"), do: [120_109] + def get("φ"), do: [966] + def get("ϕ"), do: [981] + def get("ℳ"), do: [8499] + def get("☎"), do: [9742] + def get("π"), do: [960] + def get("⋔"), do: [8916] + def get("ϖ"), do: [982] + def get("ℏ"), do: [8463] + def get("ℎ"), do: [8462] + def get("ℏ"), do: [8463] + def get("+"), do: [43] + def get("⨣"), do: [10787] + def get("⊞"), do: [8862] + def get("⨢"), do: [10786] + def get("∔"), do: [8724] + def get("⨥"), do: [10789] + def get("⩲"), do: [10866] + def get("±"), do: [177] + def get("±"), do: [177] + def get("⨦"), do: [10790] + def get("⨧"), do: [10791] + def get("±"), do: [177] + def get("⨕"), do: [10773] + def get("𝕡"), do: [120_161] + def get("£"), do: [163] + def get("£"), do: [163] + def get("≺"), do: [8826] + def get("⪳"), do: [10931] + def get("⪷"), do: [10935] + def get("≼"), do: [8828] + def get("⪯"), do: [10927] + def get("≺"), do: [8826] + def get("⪷"), do: [10935] + def get("≼"), do: [8828] + def get("⪯"), do: [10927] + def get("⪹"), do: [10937] + def get("⪵"), do: [10933] + def get("⋨"), do: [8936] + def get("≾"), do: [8830] + def get("′"), do: [8242] + def get("ℙ"), do: [8473] + def get("⪵"), do: [10933] + def get("⪹"), do: [10937] + def get("⋨"), do: [8936] + def get("∏"), do: [8719] + def get("⌮"), do: [9006] + def get("⌒"), do: [8978] + def get("⌓"), do: [8979] + def get("∝"), do: [8733] + def get("∝"), do: [8733] + def get("≾"), do: [8830] + def get("⊰"), do: [8880] + def get("𝓅"), do: [120_005] + def get("ψ"), do: [968] + def get(" "), do: [8200] + def get("𝔮"), do: [120_110] + def get("⨌"), do: [10764] + def get("𝕢"), do: [120_162] + def get("⁗"), do: [8279] + def get("𝓆"), do: [120_006] + def get("ℍ"), do: [8461] + def get("⨖"), do: [10774] + def get("?"), do: [63] + def get("≟"), do: [8799] + def get("""), do: [34] + def get("""), do: [34] + def get("⇛"), do: [8667] + def get("⇒"), do: [8658] + def get("⤜"), do: [10524] + def get("⤏"), do: [10511] + def get("⥤"), do: [10596] + def get("∽̱"), do: [8765, 817] + def get("ŕ"), do: [341] + def get("√"), do: [8730] + def get("⦳"), do: [10675] + def get("⟩"), do: [10217] + def get("⦒"), do: [10642] + def get("⦥"), do: [10661] + def get("⟩"), do: [10217] + def get("»"), do: [187] + def get("»"), do: [187] + def get("→"), do: [8594] + def get("⥵"), do: [10613] + def get("⇥"), do: [8677] + def get("⤠"), do: [10528] + def get("⤳"), do: [10547] + def get("⤞"), do: [10526] + def get("↪"), do: [8618] + def get("↬"), do: [8620] + def get("⥅"), do: [10565] + def get("⥴"), do: [10612] + def get("↣"), do: [8611] + def get("↝"), do: [8605] + def get("⤚"), do: [10522] + def get("∶"), do: [8758] + def get("ℚ"), do: [8474] + def get("⤍"), do: [10509] + def get("❳"), do: [10099] + def get("}"), do: [125] + def get("]"), do: [93] + def get("⦌"), do: [10636] + def get("⦎"), do: [10638] + def get("⦐"), do: [10640] + def get("ř"), do: [345] + def get("ŗ"), do: [343] + def get("⌉"), do: [8969] + def get("}"), do: [125] + def get("р"), do: [1088] + def get("⤷"), do: [10551] + def get("⥩"), do: [10601] + def get("”"), do: [8221] + def get("”"), do: [8221] + def get("↳"), do: [8627] + def get("ℜ"), do: [8476] + def get("ℛ"), do: [8475] + def get("ℜ"), do: [8476] + def get("ℝ"), do: [8477] + def get("▭"), do: [9645] + def get("®"), do: [174] + def get("®"), do: [174] + def get("⥽"), do: [10621] + def get("⌋"), do: [8971] + def get("𝔯"), do: [120_111] + def get("⇁"), do: [8641] + def get("⇀"), do: [8640] + def get("⥬"), do: [10604] + def get("ρ"), do: [961] + def get("ϱ"), do: [1009] + def get("→"), do: [8594] + def get("↣"), do: [8611] + def get("⇁"), do: [8641] + def get("⇀"), do: [8640] + def get("⇄"), do: [8644] + def get("⇌"), do: [8652] + def get("⇉"), do: [8649] + def get("↝"), do: [8605] + def get("⋌"), do: [8908] + def get("˚"), do: [730] + def get("≓"), do: [8787] + def get("⇄"), do: [8644] + def get("⇌"), do: [8652] + def get("‏"), do: [8207] + def get("⎱"), do: [9137] + def get("⎱"), do: [9137] + def get("⫮"), do: [10990] + def get("⟭"), do: [10221] + def get("⇾"), do: [8702] + def get("⟧"), do: [10215] + def get("⦆"), do: [10630] + def get("𝕣"), do: [120_163] + def get("⨮"), do: [10798] + def get("⨵"), do: [10805] + def get(")"), do: [41] + def get("⦔"), do: [10644] + def get("⨒"), do: [10770] + def get("⇉"), do: [8649] + def get("›"), do: [8250] + def get("𝓇"), do: [120_007] + def get("↱"), do: [8625] + def get("]"), do: [93] + def get("’"), do: [8217] + def get("’"), do: [8217] + def get("⋌"), do: [8908] + def get("⋊"), do: [8906] + def get("▹"), do: [9657] + def get("⊵"), do: [8885] + def get("▸"), do: [9656] + def get("⧎"), do: [10702] + def get("⥨"), do: [10600] + def get("℞"), do: [8478] + def get("ś"), do: [347] + def get("‚"), do: [8218] + def get("≻"), do: [8827] + def get("⪴"), do: [10932] + def get("⪸"), do: [10936] + def get("š"), do: [353] + def get("≽"), do: [8829] + def get("⪰"), do: [10928] + def get("ş"), do: [351] + def get("ŝ"), do: [349] + def get("⪶"), do: [10934] + def get("⪺"), do: [10938] + def get("⋩"), do: [8937] + def get("⨓"), do: [10771] + def get("≿"), do: [8831] + def get("с"), do: [1089] + def get("⋅"), do: [8901] + def get("⊡"), do: [8865] + def get("⩦"), do: [10854] + def get("⇘"), do: [8664] + def get("⤥"), do: [10533] + def get("↘"), do: [8600] + def get("↘"), do: [8600] + def get("§"), do: [167] + def get("§"), do: [167] + def get(";"), do: [59] + def get("⤩"), do: [10537] + def get("∖"), do: [8726] + def get("∖"), do: [8726] + def get("✶"), do: [10038] + def get("𝔰"), do: [120_112] + def get("⌢"), do: [8994] + def get("♯"), do: [9839] + def get("щ"), do: [1097] + def get("ш"), do: [1096] + def get("∣"), do: [8739] + def get("∥"), do: [8741] + def get("­"), do: [173] + def get("­"), do: [173] + def get("σ"), do: [963] + def get("ς"), do: [962] + def get("ς"), do: [962] + def get("∼"), do: [8764] + def get("⩪"), do: [10858] + def get("≃"), do: [8771] + def get("≃"), do: [8771] + def get("⪞"), do: [10910] + def get("⪠"), do: [10912] + def get("⪝"), do: [10909] + def get("⪟"), do: [10911] + def get("≆"), do: [8774] + def get("⨤"), do: [10788] + def get("⥲"), do: [10610] + def get("←"), do: [8592] + def get("∖"), do: [8726] + def get("⨳"), do: [10803] + def get("⧤"), do: [10724] + def get("∣"), do: [8739] + def get("⌣"), do: [8995] + def get("⪪"), do: [10922] + def get("⪬"), do: [10924] + def get("⪬︀"), do: [10924, 65024] + def get("ь"), do: [1100] + def get("/"), do: [47] + def get("⧄"), do: [10692] + def get("⌿"), do: [9023] + def get("𝕤"), do: [120_164] + def get("♠"), do: [9824] + def get("♠"), do: [9824] + def get("∥"), do: [8741] + def get("⊓"), do: [8851] + def get("⊓︀"), do: [8851, 65024] + def get("⊔"), do: [8852] + def get("⊔︀"), do: [8852, 65024] + def get("⊏"), do: [8847] + def get("⊑"), do: [8849] + def get("⊏"), do: [8847] + def get("⊑"), do: [8849] + def get("⊐"), do: [8848] + def get("⊒"), do: [8850] + def get("⊐"), do: [8848] + def get("⊒"), do: [8850] + def get("□"), do: [9633] + def get("□"), do: [9633] + def get("▪"), do: [9642] + def get("▪"), do: [9642] + def get("→"), do: [8594] + def get("𝓈"), do: [120_008] + def get("∖"), do: [8726] + def get("⌣"), do: [8995] + def get("⋆"), do: [8902] + def get("☆"), do: [9734] + def get("★"), do: [9733] + def get("ϵ"), do: [1013] + def get("ϕ"), do: [981] + def get("¯"), do: [175] + def get("⊂"), do: [8834] + def get("⫅"), do: [10949] + def get("⪽"), do: [10941] + def get("⊆"), do: [8838] + def get("⫃"), do: [10947] + def get("⫁"), do: [10945] + def get("⫋"), do: [10955] + def get("⊊"), do: [8842] + def get("⪿"), do: [10943] + def get("⥹"), do: [10617] + def get("⊂"), do: [8834] + def get("⊆"), do: [8838] + def get("⫅"), do: [10949] + def get("⊊"), do: [8842] + def get("⫋"), do: [10955] + def get("⫇"), do: [10951] + def get("⫕"), do: [10965] + def get("⫓"), do: [10963] + def get("≻"), do: [8827] + def get("⪸"), do: [10936] + def get("≽"), do: [8829] + def get("⪰"), do: [10928] + def get("⪺"), do: [10938] + def get("⪶"), do: [10934] + def get("⋩"), do: [8937] + def get("≿"), do: [8831] + def get("∑"), do: [8721] + def get("♪"), do: [9834] + def get("¹"), do: [185] + def get("¹"), do: [185] + def get("²"), do: [178] + def get("²"), do: [178] + def get("³"), do: [179] + def get("³"), do: [179] + def get("⊃"), do: [8835] + def get("⫆"), do: [10950] + def get("⪾"), do: [10942] + def get("⫘"), do: [10968] + def get("⊇"), do: [8839] + def get("⫄"), do: [10948] + def get("⟉"), do: [10185] + def get("⫗"), do: [10967] + def get("⥻"), do: [10619] + def get("⫂"), do: [10946] + def get("⫌"), do: [10956] + def get("⊋"), do: [8843] + def get("⫀"), do: [10944] + def get("⊃"), do: [8835] + def get("⊇"), do: [8839] + def get("⫆"), do: [10950] + def get("⊋"), do: [8843] + def get("⫌"), do: [10956] + def get("⫈"), do: [10952] + def get("⫔"), do: [10964] + def get("⫖"), do: [10966] + def get("⇙"), do: [8665] + def get("⤦"), do: [10534] + def get("↙"), do: [8601] + def get("↙"), do: [8601] + def get("⤪"), do: [10538] + def get("ß"), do: [223] + def get("ß"), do: [223] + def get("⌖"), do: [8982] + def get("τ"), do: [964] + def get("⎴"), do: [9140] + def get("ť"), do: [357] + def get("ţ"), do: [355] + def get("т"), do: [1090] + def get("⃛"), do: [8411] + def get("⌕"), do: [8981] + def get("𝔱"), do: [120_113] + def get("∴"), do: [8756] + def get("∴"), do: [8756] + def get("θ"), do: [952] + def get("ϑ"), do: [977] + def get("ϑ"), do: [977] + def get("≈"), do: [8776] + def get("∼"), do: [8764] + def get(" "), do: [8201] + def get("≈"), do: [8776] + def get("∼"), do: [8764] + def get("þ"), do: [254] + def get("þ"), do: [254] + def get("˜"), do: [732] + def get("×"), do: [215] + def get("×"), do: [215] + def get("⊠"), do: [8864] + def get("⨱"), do: [10801] + def get("⨰"), do: [10800] + def get("∭"), do: [8749] + def get("⤨"), do: [10536] + def get("⊤"), do: [8868] + def get("⌶"), do: [9014] + def get("⫱"), do: [10993] + def get("𝕥"), do: [120_165] + def get("⫚"), do: [10970] + def get("⤩"), do: [10537] + def get("‴"), do: [8244] + def get("™"), do: [8482] + def get("▵"), do: [9653] + def get("▿"), do: [9663] + def get("◃"), do: [9667] + def get("⊴"), do: [8884] + def get("≜"), do: [8796] + def get("▹"), do: [9657] + def get("⊵"), do: [8885] + def get("◬"), do: [9708] + def get("≜"), do: [8796] + def get("⨺"), do: [10810] + def get("⨹"), do: [10809] + def get("⧍"), do: [10701] + def get("⨻"), do: [10811] + def get("⏢"), do: [9186] + def get("𝓉"), do: [120_009] + def get("ц"), do: [1094] + def get("ћ"), do: [1115] + def get("ŧ"), do: [359] + def get("≬"), do: [8812] + def get("↞"), do: [8606] + def get("↠"), do: [8608] + def get("⇑"), do: [8657] + def get("⥣"), do: [10595] + def get("ú"), do: [250] + def get("ú"), do: [250] + def get("↑"), do: [8593] + def get("ў"), do: [1118] + def get("ŭ"), do: [365] + def get("û"), do: [251] + def get("û"), do: [251] + def get("у"), do: [1091] + def get("⇅"), do: [8645] + def get("ű"), do: [369] + def get("⥮"), do: [10606] + def get("⥾"), do: [10622] + def get("𝔲"), do: [120_114] + def get("ù"), do: [249] + def get("ù"), do: [249] + def get("↿"), do: [8639] + def get("↾"), do: [8638] + def get("▀"), do: [9600] + def get("⌜"), do: [8988] + def get("⌜"), do: [8988] + def get("⌏"), do: [8975] + def get("◸"), do: [9720] + def get("ū"), do: [363] + def get("¨"), do: [168] + def get("¨"), do: [168] + def get("ų"), do: [371] + def get("𝕦"), do: [120_166] + def get("↑"), do: [8593] + def get("↕"), do: [8597] + def get("↿"), do: [8639] + def get("↾"), do: [8638] + def get("⊎"), do: [8846] + def get("υ"), do: [965] + def get("ϒ"), do: [978] + def get("υ"), do: [965] + def get("⇈"), do: [8648] + def get("⌝"), do: [8989] + def get("⌝"), do: [8989] + def get("⌎"), do: [8974] + def get("ů"), do: [367] + def get("◹"), do: [9721] + def get("𝓊"), do: [120_010] + def get("⋰"), do: [8944] + def get("ũ"), do: [361] + def get("▵"), do: [9653] + def get("▴"), do: [9652] + def get("⇈"), do: [8648] + def get("ü"), do: [252] + def get("ü"), do: [252] + def get("⦧"), do: [10663] + def get("⇕"), do: [8661] + def get("⫨"), do: [10984] + def get("⫩"), do: [10985] + def get("⊨"), do: [8872] + def get("⦜"), do: [10652] + def get("ϵ"), do: [1013] + def get("ϰ"), do: [1008] + def get("∅"), do: [8709] + def get("ϕ"), do: [981] + def get("ϖ"), do: [982] + def get("∝"), do: [8733] + def get("↕"), do: [8597] + def get("ϱ"), do: [1009] + def get("ς"), do: [962] + def get("⊊︀"), do: [8842, 65024] + def get("⫋︀"), do: [10955, 65024] + def get("⊋︀"), do: [8843, 65024] + def get("⫌︀"), do: [10956, 65024] + def get("ϑ"), do: [977] + def get("⊲"), do: [8882] + def get("⊳"), do: [8883] + def get("в"), do: [1074] + def get("⊢"), do: [8866] + def get("∨"), do: [8744] + def get("⊻"), do: [8891] + def get("≚"), do: [8794] + def get("⋮"), do: [8942] + def get("|"), do: [124] + def get("|"), do: [124] + def get("𝔳"), do: [120_115] + def get("⊲"), do: [8882] + def get("⊂⃒"), do: [8834, 8402] + def get("⊃⃒"), do: [8835, 8402] + def get("𝕧"), do: [120_167] + def get("∝"), do: [8733] + def get("⊳"), do: [8883] + def get("𝓋"), do: [120_011] + def get("⫋︀"), do: [10955, 65024] + def get("⊊︀"), do: [8842, 65024] + def get("⫌︀"), do: [10956, 65024] + def get("⊋︀"), do: [8843, 65024] + def get("⦚"), do: [10650] + def get("ŵ"), do: [373] + def get("⩟"), do: [10847] + def get("∧"), do: [8743] + def get("≙"), do: [8793] + def get("℘"), do: [8472] + def get("𝔴"), do: [120_116] + def get("𝕨"), do: [120_168] + def get("℘"), do: [8472] + def get("≀"), do: [8768] + def get("≀"), do: [8768] + def get("𝓌"), do: [120_012] + def get("⋂"), do: [8898] + def get("◯"), do: [9711] + def get("⋃"), do: [8899] + def get("▽"), do: [9661] + def get("𝔵"), do: [120_117] + def get("⟺"), do: [10234] + def get("⟷"), do: [10231] + def get("ξ"), do: [958] + def get("⟸"), do: [10232] + def get("⟵"), do: [10229] + def get("⟼"), do: [10236] + def get("⋻"), do: [8955] + def get("⨀"), do: [10752] + def get("𝕩"), do: [120_169] + def get("⨁"), do: [10753] + def get("⨂"), do: [10754] + def get("⟹"), do: [10233] + def get("⟶"), do: [10230] + def get("𝓍"), do: [120_013] + def get("⨆"), do: [10758] + def get("⨄"), do: [10756] + def get("△"), do: [9651] + def get("⋁"), do: [8897] + def get("⋀"), do: [8896] + def get("ý"), do: [253] + def get("ý"), do: [253] + def get("я"), do: [1103] + def get("ŷ"), do: [375] + def get("ы"), do: [1099] + def get("¥"), do: [165] + def get("¥"), do: [165] + def get("𝔶"), do: [120_118] + def get("ї"), do: [1111] + def get("𝕪"), do: [120_170] + def get("𝓎"), do: [120_014] + def get("ю"), do: [1102] + def get("ÿ"), do: [255] + def get("ÿ"), do: [255] + def get("ź"), do: [378] + def get("ž"), do: [382] + def get("з"), do: [1079] + def get("ż"), do: [380] + def get("ℨ"), do: [8488] + def get("ζ"), do: [950] + def get("𝔷"), do: [120_119] + def get("ж"), do: [1078] + def get("⇝"), do: [8669] + def get("𝕫"), do: [120_171] + def get("𝓏"), do: [120_015] + def get("‍"), do: [8205] + def get("‌"), do: [8204] + def get(_), do: [] +end diff --git a/lib/floki/html/numeric_charref.ex b/lib/floki/html/numeric_charref.ex new file mode 100644 index 00000000..4e6da461 --- /dev/null +++ b/lib/floki/html/numeric_charref.ex @@ -0,0 +1,108 @@ +defmodule Floki.HTML.NumericCharref do + @moduledoc false + + # REPLACEMENT CHARACTER + def to_unicode_number(0x00), do: {:ok, {:table, 0xFFFD}} + # EURO SIGN (€) + def to_unicode_number(0x80), do: {:ok, {:table, 0x20AC}} + # SINGLE LOW-9 QUOTATION MARK (‚) + def to_unicode_number(0x82), do: {:ok, {:table, 0x201A}} + # LATIN SMALL LETTER F WITH HOOK (ƒ) + def to_unicode_number(0x83), do: {:ok, {:table, 0x0192}} + # DOUBLE LOW-9 QUOTATION MARK („) + def to_unicode_number(0x84), do: {:ok, {:table, 0x201E}} + # HORIZONTAL ELLIPSIS (…) + def to_unicode_number(0x85), do: {:ok, {:table, 0x2026}} + # DAGGER (†) + def to_unicode_number(0x86), do: {:ok, {:table, 0x2020}} + # DOUBLE DAGGER (‡) + def to_unicode_number(0x87), do: {:ok, {:table, 0x2021}} + # MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ) + def to_unicode_number(0x88), do: {:ok, {:table, 0x02C6}} + # PER MILLE SIGN (‰) + def to_unicode_number(0x89), do: {:ok, {:table, 0x2030}} + # LATIN CAPITAL LETTER S WITH CARON (Š) + def to_unicode_number(0x8A), do: {:ok, {:table, 0x0160}} + # SINGLE LEFT-POINTING ANGLE QUOTATION MARK (‹) + def to_unicode_number(0x8B), do: {:ok, {:table, 0x2039}} + # LATIN CAPITAL LIGATURE OE (Œ) + def to_unicode_number(0x8C), do: {:ok, {:table, 0x0152}} + # LATIN CAPITAL LETTER Z WITH CARON (Ž) + def to_unicode_number(0x8E), do: {:ok, {:table, 0x017D}} + # LEFT SINGLE QUOTATION MARK (‘) + def to_unicode_number(0x91), do: {:ok, {:table, 0x2018}} + # RIGHT SINGLE QUOTATION MARK (’) + def to_unicode_number(0x92), do: {:ok, {:table, 0x2019}} + # LEFT DOUBLE QUOTATION MARK (“) + def to_unicode_number(0x93), do: {:ok, {:table, 0x201C}} + # RIGHT DOUBLE QUOTATION MARK (”) + def to_unicode_number(0x94), do: {:ok, {:table, 0x201D}} + # BULLET (•) + def to_unicode_number(0x95), do: {:ok, {:table, 0x2022}} + # EN DASH (–) + def to_unicode_number(0x96), do: {:ok, {:table, 0x2013}} + # EM DASH (—) + def to_unicode_number(0x97), do: {:ok, {:table, 0x2014}} + # SMALL TILDE (˜) + def to_unicode_number(0x98), do: {:ok, {:table, 0x02DC}} + # TRADE MARK SIGN (™) + def to_unicode_number(0x99), do: {:ok, {:table, 0x2122}} + # LATIN SMALL LETTER S WITH CARON (š) + def to_unicode_number(0x9A), do: {:ok, {:table, 0x0161}} + # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (›) + def to_unicode_number(0x9B), do: {:ok, {:table, 0x203A}} + # LATIN SMALL LIGATURE OE (œ) + def to_unicode_number(0x9C), do: {:ok, {:table, 0x0153}} + # LATIN SMALL LETTER Z WITH CARON (ž) + def to_unicode_number(0x9E), do: {:ok, {:table, 0x017E}} + # LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ) + def to_unicode_number(0x9F), do: {:ok, {:table, 0x0178}} + + def to_unicode_number(number) when number in 0xD800..0xDFFF or number > 0x10FFFF, + do: {:ok, {:range_one, 0xFFFD}} + + def to_unicode_number(number) + when number in 0x0001..0x0008 or number in 0x000D..0x001F or number in 0x007F..0x009F or + number in 0xFDD0..0xFDEF or + number in [ + 0x000B, + 0xFFFE, + 0xFFFF, + 0x1FFFE, + 0x1FFFF, + 0x2FFFE, + 0x2FFFF, + 0x3FFFE, + 0x3FFFF, + 0x4FFFE, + 0x4FFFF, + 0x5FFFE, + 0x5FFFF, + 0x6FFFE, + 0x6FFFF, + 0x7FFFE, + 0x7FFFF, + 0x8FFFE, + 0x8FFFF, + 0x9FFFE, + 0x9FFFF, + 0xAFFFE, + 0xAFFFF, + 0xBFFFE, + 0xBFFFF, + 0xCFFFE, + 0xCFFFF, + 0xDFFFE, + 0xDFFFF, + 0xEFFFE, + 0xEFFFF, + 0xFFFFE, + 0xFFFFF, + 0x10FFFE, + 0x10FFFF + ] do + {:ok, {:list_of_errors, number}} + end + + def to_unicode_number(number), do: {:ok, {:unicode, number}} +end diff --git a/lib/floki/html/tokenizer.ex b/lib/floki/html/tokenizer.ex new file mode 100644 index 00000000..d9676510 --- /dev/null +++ b/lib/floki/html/tokenizer.ex @@ -0,0 +1,2867 @@ +defmodule Floki.HTML.Tokenizer do + @moduledoc false + + # HTML tokenizer built according to the specs of WHATWG/W3C. + # https://html.spec.whatwg.org/multipage/#toc-syntax + # + # In order to find the docs of a given state, add it as an anchor to the link above. + # Example: https://html.spec.whatwg.org/multipage/parsing.html#data-state + # + # The tests for this module can be found in test/floki/html/generated/tokenizer. + # They were generated based on test files from https://github.com/html5lib/html5lib-tests + # In order to update those test files you first need to run the task: + # + # mix generate_tokenizer_tests filename.tests + # + # Where "filename.tests" is a file present in "test/html5lib-tests/tokenizer" directory. + # + # This tokenizer depends on an entities list that is generated with another mix task. + # That file shouldn't change much, but if needed, it can be updated with: + # + # mix generate_entities + # + # This tokenizer does not work with streams yet. + + defmodule Doctype do + @moduledoc false + + defstruct name: nil, + public_id: nil, + system_id: nil, + force_quirks: :off + + @type t :: %__MODULE__{ + name: iodata(), + public_id: iodata() | nil, + system_id: iodata() | nil, + force_quirks: :on | :off + } + end + + defmodule Attribute do + @moduledoc false + + defstruct name: "", value: "" + + @type t :: %__MODULE__{ + name: iodata(), + value: iodata() + } + end + + defmodule StartTag do + @moduledoc false + + defstruct name: "", + self_close: nil, + attributes: [] + + @type t :: %__MODULE__{ + name: iodata(), + self_close: boolean() | nil, + attributes: list(Attribute.t()) + } + end + + defmodule EndTag do + @moduledoc false + + defstruct name: "", + self_close: nil, + attributes: [] + + @type t :: %__MODULE__{ + name: iodata(), + self_close: boolean() | nil, + attributes: list(Attribute.t()) + } + end + + defmodule Comment do + @moduledoc false + + defstruct data: "" + + @type t :: %__MODULE__{ + data: iodata() + } + end + + defmodule CharrefState do + @moduledoc false + + defstruct candidate: nil, done: false, length: 0 + + @type t :: %__MODULE__{ + candidate: binary(), + done: boolean(), + length: integer() + } + end + + # It represents the state of tokenization. + defmodule State do + @moduledoc false + + defstruct return_state: nil, + eof_last_state: nil, + adjusted_current_node: nil, + token: nil, + tokens: [], + buffer: "", + last_start_tag: nil, + errors: [], + emit: nil, + charref_state: nil, + charref_code: nil + + @type token :: Doctype.t() | Comment.t() | StartTag.t() | EndTag.t() | {:char, iodata()} + + @type t :: %__MODULE__{ + return_state: + :data + | :rcdata + | :attribute_value_double_quoted + | :attribute_value_single_quoted + | :attribute_value_unquoted, + eof_last_state: atom(), + buffer: iodata(), + token: token() | nil, + tokens: list(token()), + errors: [{:parse_error, binary() | nil}], + last_start_tag: StartTag.t(), + charref_state: CharrefState.t(), + charref_code: integer(), + emit: (token() -> token()) + } + end + + @lower_ASCII_letters ?a..?z + @upper_ASCII_letters ?A..?Z + @ascii_digits ?0..?9 + @space_chars [?\t, ?\n, ?\f, ?\s] + + defguardp is_lower_letter(c) when c in @lower_ASCII_letters + defguardp is_upper_letter(c) when c in @upper_ASCII_letters + defguardp is_digit(c) when c in @ascii_digits + defguardp is_letter(c) when c in @upper_ASCII_letters or c in @lower_ASCII_letters + defguardp is_space(c) when c in @space_chars + + @less_than_sign ?< + @greater_than_sign ?> + @exclamation_mark ?! + @solidus ?/ + @hyphen_minus ?- + @replacement_char 0xFFFD + + @spec tokenize(binary()) :: State.t() + def tokenize(html) do + pattern = :binary.compile_pattern(["\r\n", "\r"]) + + html + |> String.replace(pattern, "\n") + |> data(%State{emit: fn token -> token end}) + end + + # It assumes that the parser stops at the end of file. + # If we need to work with streams, this can't reverse here. + defp eof(last_state, s) do + %{ + s + | eof_last_state: last_state, + tokens: Enum.reverse([:eof | s.tokens]), + errors: Enum.reverse(s.errors) + } + end + + # § tokenizer-data-state + + defp data(<>, s) do + character_reference(html, %{s | return_state: :data}) + end + + defp data(<>, s) do + tag_open(html, s) + end + + defp data(<<0, html::binary>>, s) do + data(html, %{s | tokens: append_char_token(s, 0)}) + end + + defp data("", s) do + eof(:data, s) + end + + defp data(<>, s) do + data(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-rcdata-state: re-entrant + + @spec rcdata(binary(), %State{}) :: %State{} + def rcdata(<>, s) do + character_reference(html, %{s | return_state: :rcdata}) + end + + def rcdata(<>, s) do + rcdata_less_than_sign(html, s) + end + + def rcdata(<<0, html::binary>>, s) do + rcdata(html, %{s | tokens: append_char_token(s, @replacement_char)}) + end + + def rcdata("", s) do + eof(:rcdata, s) + end + + def rcdata(<>, s) do + rcdata(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-rawtext-state: re-entrant + + @spec rawtext(binary(), State.t()) :: State.t() + def rawtext(<>, s) do + rawtext_less_than_sign(html, s) + end + + def rawtext(<<0, html::binary>>, s) do + rawtext(html, %{s | tokens: append_char_token(s, @replacement_char)}) + end + + def rawtext("", s) do + eof(:rawtext, s) + end + + def rawtext(<>, s) do + rawtext(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-script-data-state: re-entrant + + @spec script_data(binary(), State.t()) :: State.t() + def script_data(<>, s) do + script_data_less_than_sign(html, s) + end + + def script_data(<<0, html::binary>>, s) do + script_data(html, %{s | tokens: append_char_token(s, @replacement_char)}) + end + + def script_data("", s) do + eof(:script_data, s) + end + + def script_data(<>, s) do + script_data(html, %{ + s + | tokens: append_char_token(s, c) + }) + end + + # § tokenizer-plaintext-state: re-entrant + + @spec plaintext(binary(), State.t()) :: State.t() + def plaintext(<<0, html::binary>>, s) do + plaintext(html, %{s | tokens: append_char_token(s, @replacement_char)}) + end + + def plaintext("", s) do + eof(:plaintext, s) + end + + def plaintext(<>, s) do + plaintext(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-tag-open-state + + defp tag_open(<>, s) do + markup_declaration_open(html, s) + end + + defp tag_open(<>, s) do + end_tag_open(html, s) + end + + defp tag_open(html = <>, s) + when is_letter(c) do + token = %StartTag{name: ""} + + tag_name(html, %{s | token: token}) + end + + defp tag_open(html = <>, s) do + token = %Comment{data: ""} + + bogus_comment(html, %{s | token: token}) + end + + defp tag_open(html, s) do + data(html, %{ + s + | token: nil, + tokens: append_char_token(s, @less_than_sign) + }) + end + + # § tokenizer-end-tag-open-state + + defp end_tag_open(html = <>, s) + when is_letter(c) do + token = %EndTag{name: ""} + + tag_name(html, %{s | token: token}) + end + + defp end_tag_open(<, html::binary>>, s) do + data(html, %{s | token: nil}) + end + + defp end_tag_open("", s) do + eof(:data, %{ + s + | token: nil, + tokens: append_char_token(s, [@less_than_sign, @solidus]), + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp end_tag_open(html, s) do + token = %Comment{data: ""} + + bogus_comment(html, %{s | token: token}) + end + + # § tokenizer-tag-name-state + + defp tag_name(<>, s) + when is_space(c) do + before_attribute_name(html, s) + end + + defp tag_name(<>, s) do + self_closing_start_tag(html, s) + end + + defp tag_name(<, html::binary>>, s) do + data(html, %{ + s + | last_start_tag: s.token, + tokens: [s.emit.(s.token) | s.tokens], + token: nil + }) + end + + defp tag_name(<>, s) + when is_upper_letter(c) do + new_token = %{s.token | name: [s.token.name | [c + 32]]} + + tag_name(html, %{s | token: new_token}) + end + + defp tag_name(<<0, html::binary>>, s) do + tag_name(html, %{ + s + | token: %{s.token | name: [s.token.name | [@replacement_char]]}, + errors: [ + {:parse_error, "unexpected-null-character"} + | s.errors + ] + }) + end + + defp tag_name("", s) do + eof(:tag_name, %{ + s + | errors: [{:parse_error, "eof-in-tag"} | s.errors] + }) + end + + defp tag_name(<>, s) do + new_token = %{s.token | name: [s.token.name | [c]]} + + tag_name(html, %{s | token: new_token}) + end + + # § tokenizer-rcdata-less-than-sign-state + + defp rcdata_less_than_sign(<>, s) do + rcdata_end_tag_open(html, %{s | buffer: ""}) + end + + defp rcdata_less_than_sign(html, s) do + rcdata(html, %{ + s + | token: nil, + tokens: append_char_token(s, @less_than_sign) + }) + end + + # § tokenizer-rcdata-end-tag-open-state + + defp rcdata_end_tag_open( + html = <>, + s + ) + when is_letter(c) do + token = %EndTag{name: ""} + rcdata_end_tag_name(html, %{s | token: token}) + end + + defp rcdata_end_tag_open(html, s) do + rcdata(html, %{s | tokens: append_char_token(s, [@less_than_sign, @solidus])}) + end + + # § tokenizer-rcdata-end-tag-name-state + + defp rcdata_end_tag_name(html = <>, s) + when is_space(c) do + if appropriate_tag?(s) do + before_attribute_name(rest, s) + else + rcdata(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp rcdata_end_tag_name(html = <>, s) do + if appropriate_tag?(s) do + self_closing_start_tag(rest, s) + else + rcdata(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp rcdata_end_tag_name(html = <, rest::binary>>, s) do + if appropriate_tag?(s) do + data(rest, %{ + s + | token: nil, + tokens: [s.emit.(s.token) | s.tokens] + }) + else + rcdata(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + # TODO: should we always declare %State{}? + defp rcdata_end_tag_name(<>, %State{} = s) + when is_upper_letter(c) do + c_downcased = c + 32 + new_token = %{s.token | name: [s.token.name | [c_downcased]]} + + rcdata(html, %{s | token: new_token, buffer: [s.buffer | [c_downcased]]}) + end + + defp rcdata_end_tag_name(<>, s) + when is_lower_letter(c) do + col = s.col + 1 + new_token = %{s.token | name: [s.name | [c]]} + + rcdata_end_tag_name(html, %{s | token: new_token, buffer: [s.buffer | [c]], col: col}) + end + + defp rcdata_end_tag_name(html, s) do + rcdata(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + + # § tokenizer-rawtext-end-tag-name-state + + defp rawtext_end_tag_name(html = <>, s) + when is_space(c) do + if appropriate_tag?(s) do + before_attribute_name(rest, s) + else + rawtext(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp rawtext_end_tag_name(html = <<"/", rest::binary>>, s) do + if appropriate_tag?(s) do + self_closing_start_tag(rest, s) + else + rawtext(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp rawtext_end_tag_name(html = <<">", rest::binary>>, s) do + if appropriate_tag?(s) do + data(rest, %{ + s + | token: nil, + tokens: [s.emit.(s.token) | s.tokens] + }) + else + rawtext(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp rawtext_end_tag_name(<>, s) + when is_upper_letter(c) do + new_token = %{s.token | name: [s.token.name | [c + 32]]} + + rawtext(html, %{s | token: new_token, buffer: [s.buffer | [c]]}) + end + + defp rawtext_end_tag_name(<>, s) + when is_lower_letter(c) do + col = s.col + 1 + new_token = %{s.token | name: [s.name | [c]]} + + rawtext_end_tag_name(html, %{s | token: new_token, buffer: [s.buffer | [c]], col: col}) + end + + defp rawtext_end_tag_name(html, s) do + rawtext(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + + # § tokenizer-script-data-end-tag-name-state + + defp script_data_end_tag_name(html = <>, s) + when is_space(c) do + if appropriate_tag?(s) do + before_attribute_name(rest, s) + else + script_data(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp script_data_end_tag_name(html = <>, s) do + if appropriate_tag?(s) do + self_closing_start_tag(rest, s) + else + script_data(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp script_data_end_tag_name(html = <, rest::binary>>, s) do + if appropriate_tag?(s) do + data(rest, %{ + s + | token: nil, + tokens: [s.emit.(s.token) | s.tokens] + }) + else + script_data(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + defp script_data_end_tag_name(<>, s) + when is_upper_letter(c) do + c_downcased = c + 32 + new_token = %{s.token | name: [s.token.name | [c_downcased]]} + + script_data(html, %{s | token: new_token, buffer: [s.buffer | [c_downcased]]}) + end + + defp script_data_end_tag_name(<>, s) + when is_lower_letter(c) do + new_token = %{s.token | name: [s.name | [c]]} + + script_data_end_tag_name(html, %{s | token: new_token, buffer: [s.buffer | [c]]}) + end + + defp script_data_end_tag_name(html, s) do + script_data(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + + # § tokenizer-script-data-escaped-end-tag-name-state: re-entrant + + @spec script_data_escaped_end_tag_name(binary(), State.t()) :: State.t() + def script_data_escaped_end_tag_name(html = <>, s) + when is_space(c) do + if appropriate_tag?(s) do + before_attribute_name(rest, s) + else + script_data_escaped(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + def script_data_escaped_end_tag_name(html = <>, s) do + if appropriate_tag?(s) do + self_closing_start_tag(rest, s) + else + script_data_escaped(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + def script_data_escaped_end_tag_name(html = <, rest::binary>>, s) do + if appropriate_tag?(s) do + data(rest, %{ + s + | token: nil, + tokens: [s.emit.(s.token) | s.tokens] + }) + else + script_data_escaped(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + end + + def script_data_escaped_end_tag_name(<>, s) + when is_upper_letter(c) do + new_token = %{s.token | name: [s.name | [c + 32]]} + + script_data_escaped(html, %{s | token: new_token, buffer: [s.buffer | [c]]}) + end + + def script_data_escaped_end_tag_name(<>, s) + when is_lower_letter(c) do + new_token = %{s.token | name: [s.token.name | [c]]} + + script_data_escaped_end_tag_name(html, %{ + s + | token: new_token, + buffer: [s.buffer | [c]] + }) + end + + def script_data_escaped_end_tag_name(html, s) do + script_data_escaped(html, %{ + s + | tokens: tokens_for_inappropriate_end_tag(s), + buffer: "" + }) + end + + # § tokenizer-rawtext-less-than-sign-state + + defp rawtext_less_than_sign(<>, s) do + rawtext_end_tag_open(html, %{s | buffer: ""}) + end + + defp rawtext_less_than_sign(html, s) do + rawtext(html, %{s | tokens: append_char_token(s, 0x003C)}) + end + + # § tokenizer-rawtext-end-tag-open-state + + defp rawtext_end_tag_open( + html = <>, + s + ) + when is_letter(c) do + token = %EndTag{name: ""} + rawtext_end_tag_name(html, %{s | token: token}) + end + + defp rawtext_end_tag_open(html, s) do + rawtext(html, %{s | tokens: append_char_token(s, [@less_than_sign, @solidus])}) + end + + # § tokenizer-script-data-less-than-sign-state + + defp script_data_less_than_sign(<>, s) do + script_data_end_tag_open(html, %{s | buffer: ""}) + end + + defp script_data_less_than_sign(<>, s) do + script_data_less_than_sign(html, %{ + s + | tokens: append_char_token(s, [@less_than_sign, @exclamation_mark]) + }) + end + + defp script_data_less_than_sign(html, s) do + script_data(html, %{s | tokens: append_char_token(s, @less_than_sign)}) + end + + # § tokenizer-script-data-end-tag-open-state + + defp script_data_end_tag_open( + html = <>, + s + ) + when is_letter(c) do + end_tag = %EndTag{name: ""} + script_data_end_tag_name(html, %{s | token: end_tag}) + end + + defp script_data_end_tag_open(html, s) do + script_data(html, %{ + s + | tokens: append_char_token(s, [@less_than_sign, @solidus]) + }) + end + + # § tokenizer-script-data-escape-start-state: re-entrant + + @spec script_data_escape_start(binary(), State.t()) :: State.t() + def script_data_escape_start(<>, s) do + script_data_escape_start_dash( + html, + %{ + s + | tokens: append_char_token(s, @hyphen_minus) + } + ) + end + + def script_data_escape_start(html, s) do + script_data(html, s) + end + + # § tokenizer-script-data-escape-start-dash-state + + defp script_data_escape_start_dash(<>, s) do + script_data_escaped_dash_dash( + html, + %{ + s + | tokens: append_char_token(s, @hyphen_minus) + } + ) + end + + defp script_data_escape_start_dash(html, s) do + script_data(html, s) + end + + # § tokenizer-script-data-escaped-state + + defp script_data_escaped(<>, s) do + script_data_escaped_dash( + html, + %{s | tokens: append_char_token(s, @hyphen_minus)} + ) + end + + defp script_data_escaped(<>, s) do + script_data_escaped_less_than_sign(html, s) + end + + defp script_data_escaped(<<0, html::binary>>, s) do + script_data_escaped(html, %{s | tokens: append_char_token(s, @replacement_char)}) + end + + defp script_data_escaped("", s) do + eof(:script_data_escaped, s) + end + + defp script_data_escaped(<>, s) do + script_data_escaped(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-script-data-escaped-dash-state + + defp script_data_escaped_dash(<>, s) do + script_data_escaped_dash_dash( + html, + %{ + s + | tokens: append_char_token(s, @hyphen_minus) + } + ) + end + + defp script_data_escaped_dash(<>, s) do + script_data_escaped_less_than_sign(html, s) + end + + defp script_data_escaped_dash(<<0, html::binary>>, s) do + script_data_escaped(html, %{ + s + | tokens: append_char_token(s, @replacement_char) + }) + end + + defp script_data_escaped_dash("", s) do + eof(:tokenize, s) + end + + defp script_data_escaped_dash( + <>, + s + ) do + script_data_escaped(html, %{ + s + | tokens: append_char_token(s, c) + }) + end + + # § tokenizer-script-data-escaped-dash-dash-state + + defp script_data_escaped_dash_dash(<>, s) do + script_data_escaped_dash_dash( + html, + %{s | tokens: append_char_token(s, @hyphen_minus)} + ) + end + + defp script_data_escaped_dash_dash(<>, s) do + script_data_escaped_less_than_sign(html, s) + end + + defp script_data_escaped_dash_dash(<, html::binary>>, s) do + script_data(html, %{ + s + | tokens: append_char_token(s, @greater_than_sign) + }) + end + + defp script_data_escaped_dash_dash(<<0, html::binary>>, s) do + script_data_escaped(html, %{ + s + | tokens: append_char_token(s, @replacement_char) + }) + end + + defp script_data_escaped_dash_dash("", s) do + eof(:script_data_escaped_dash_dash, s) + end + + defp script_data_escaped_dash_dash( + <>, + s + ) do + script_data_escaped(html, %{ + s + | tokens: append_char_token(s, <>) + }) + end + + # § tokenizer-script-data-escaped-less-than-sign-state + + defp script_data_escaped_less_than_sign(<>, s) do + script_data_escaped_end_tag_open(html, %{s | buffer: ""}) + end + + defp script_data_escaped_less_than_sign( + html = <>, + s + ) + when is_lower_letter(c) or is_upper_letter(c) do + # TODO: revert this after implement the script_data_double_scape_start state + # script_data_double_escape_start( + data( + html, + %{ + s + | buffer: "", + tokens: append_char_token(s, @less_than_sign) + } + ) + end + + defp script_data_escaped_less_than_sign(html, s) do + script_data_escaped(html, %{ + s + | tokens: append_char_token(s, @less_than_sign) + }) + end + + # § tokenizer-script-data-escaped-end-tag-open-state + + defp script_data_escaped_end_tag_open( + html = <>, + s + ) + when is_lower_letter(c) or is_upper_letter(c) do + script_data_escaped_end_tag_name( + html, + %{ + s + | token: %EndTag{name: ""} + } + ) + end + + defp script_data_escaped_end_tag_open(html, s) do + script_data_escaped(html, %{ + s + | tokens: append_char_token(s, [@less_than_sign, @solidus]) + }) + end + + # § tokenizer-script-data-double-escape-start-state: re-entrant + + @spec script_data_double_escaped_end_tag_open(binary(), State.t()) :: State.t() + def script_data_double_escaped_end_tag_open( + <>, + s + ) + when c in [@solidus, @greater_than_sign | @space_chars] do + s = %{s | tokens: append_char_token(s, <>)} + + if s.buffer && IO.chardata_to_string(s.buffer) == "script" do + script_data_double_escaped(html, s) + else + script_data_escaped(html, s) + end + end + + def script_data_double_escaped_end_tag_open( + <>, + s + ) + when is_upper_letter(c) do + script_data_double_escaped_end_tag_open(html, %{ + s + | buffer: [s.buffer, c + 32], + tokens: append_char_token(s, c) + }) + end + + def script_data_double_escaped_end_tag_open( + <>, + s + ) + when is_lower_letter(c) do + script_data_double_escaped_end_tag_open(html, %{ + s + | buffer: [s.buffer, c], + tokens: append_char_token(s, c) + }) + end + + def script_data_double_escaped_end_tag_open(html, s) do + script_data_escaped(html, s) + end + + # § tokenizer-script-data-double-escaped-state: re-entrant + + @spec script_data_double_escaped(binary(), State.t()) :: State.t() + def script_data_double_escaped(<>, s) do + script_data_double_escaped_dash(html, %{ + s + | tokens: append_char_token(s, @hyphen_minus) + }) + end + + def script_data_double_escaped(<>, s) do + script_data_double_escaped_less_than_sign(html, %{ + s + | tokens: append_char_token(s, @less_than_sign) + }) + end + + def script_data_double_escaped(<<0, html::binary>>, s) do + script_data_double_escaped(html, %{s | tokens: append_char_token(s, @replacement_char)}) + end + + def script_data_double_escaped("", s) do + eof(:script_data_double_escaped, s) + end + + def script_data_double_escaped(<>, s) do + script_data_double_escaped(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-script-data-double-escaped-dash-state + + defp script_data_double_escaped_dash(<>, s) do + script_data_double_escaped_dash_dash(html, %{ + s + | tokens: append_char_token(s, @hyphen_minus) + }) + end + + defp script_data_double_escaped_dash(<>, s) do + script_data_double_escaped_less_than_sign(html, %{ + s + | tokens: append_char_token(s, @less_than_sign) + }) + end + + defp script_data_double_escaped_dash(<<0, html::binary>>, s) do + script_data_double_escaped(html, %{ + s + | tokens: append_char_token(s, @replacement_char) + }) + end + + defp script_data_double_escaped_dash("", s) do + eof(:script_data_double_escaped_dash, s) + end + + defp script_data_double_escaped_dash(<>, s) do + script_data_double_escaped(html, %{ + s + | tokens: append_char_token(s, c) + }) + end + + # § tokenizer-script-data-double-escaped-dash-dash-state + + defp script_data_double_escaped_dash_dash(<>, s) do + script_data_double_escaped_dash_dash(html, %{ + s + | tokens: append_char_token(s, @hyphen_minus) + }) + end + + defp script_data_double_escaped_dash_dash(<>, s) do + script_data_double_escaped_less_than_sign(html, %{ + s + | tokens: append_char_token(s, @less_than_sign) + }) + end + + defp script_data_double_escaped_dash_dash(<, html::binary>>, s) do + script_data(html, %{ + s + | tokens: append_char_token(s, @greater_than_sign) + }) + end + + defp script_data_double_escaped_dash_dash( + <<0, html::binary>>, + s + ) do + script_data_double_escaped(html, %{ + s + | tokens: append_char_token(s, @replacement_char) + }) + end + + defp script_data_double_escaped_dash_dash("", s) do + eof(:script_data_double_escaped_dash_dash, s) + end + + defp script_data_double_escaped_dash_dash( + <>, + s + ) do + script_data_double_escaped(html, %{ + s + | tokens: append_char_token(s, c) + }) + end + + # § tokenizer-script-data-double-escaped-less-than-sign-state + + defp script_data_double_escaped_less_than_sign( + <>, + s + ) do + script_data_double_escape_end(html, %{ + s + | buffer: "", + tokens: append_char_token(s, @solidus) + }) + end + + defp script_data_double_escaped_less_than_sign(html, s) do + script_data_double_escaped(html, s) + end + + # § tokenizer-script-data-double-escape-end-state + + defp script_data_double_escape_end( + <>, + s + ) + when c in [?/, ?> | @space_chars] do + if IO.chardata_to_string(s.buffer) == "script" do + script_data_escaped(html, %{s | tokens: append_char_token(s, c)}) + else + script_data_double_escaped(html, %{s | tokens: append_char_token(s, c)}) + end + end + + defp script_data_double_escape_end( + <>, + s + ) + when is_upper_letter(c) do + script_data_double_escape_end(html, %{ + s + | buffer: [s.buffer | [c + 32]], + tokens: append_char_token(s, c) + }) + end + + defp script_data_double_escape_end( + <>, + s + ) + when is_lower_letter(c) do + script_data_double_escape_end(html, %{ + s + | buffer: [s.buffer | [c]], + tokens: append_char_token(s, c) + }) + end + + defp script_data_double_escape_end(html, s) do + script_data_double_escaped(html, s) + end + + # § tokenizer-before-attribute-name-state + + defp before_attribute_name(<>, s) + when is_space(c) do + before_attribute_name(html, s) + end + + defp before_attribute_name(html = <>, s) + when c in [?/, ?>] do + after_attribute_name(html, s) + end + + defp before_attribute_name("", s) do + after_attribute_name("", s) + end + + defp before_attribute_name(<>, s) do + new_token = %StartTag{ + s.token + | attributes: [ + %Attribute{name: "=", value: ""} | s.token.attributes + ] + } + + attribute_name(html, %{ + s + | errors: [{:parse_error, nil} | s.errors], + token: new_token + }) + end + + defp before_attribute_name(html, s) do + # NOTE: token here can be a StartTag or EndTag. Attributes on end tags will be ignored. + new_token = %{ + s.token + | attributes: [ + %Attribute{name: "", value: ""} | s.token.attributes + ] + } + + attribute_name(html, %{ + s + | token: new_token + }) + end + + # § tokenizer-attribute-name-state + + defp attribute_name(html = <>, s) + when c in [@solidus, @greater_than_sign | @space_chars] do + # FIXME: before changing the state, verify if same attr already exists. + after_attribute_name(html, s) + end + + defp attribute_name("", s) do + # FIXME: before changing the state, verify if same attr already exists. + after_attribute_name("", s) + end + + defp attribute_name(<>, s) do + # FIXME: before changing the state, verify if same attr already exists. + before_attribute_value(html, s) + end + + defp attribute_name(<>, s) + when is_upper_letter(c) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | name: [attr.name | [c + 32]]} + new_token = %StartTag{s.token | attributes: [new_attr | attrs]} + + attribute_name(html, %{s | token: new_token}) + end + + defp attribute_name(<<0, html::binary>>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | name: [attr.name | [@replacement_char]]} + new_token = %StartTag{s.token | attributes: [new_attr | attrs]} + + attribute_name(html, %{s | token: new_token}) + end + + defp attribute_name(<>, s) + when c in [?", ?', ?<] do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | name: [attr.name | [c]]} + new_token = %StartTag{s.token | attributes: [new_attr | attrs]} + + attribute_name(html, %{ + s + | errors: [{:parse_error, nil} | s.errors], + token: new_token + }) + end + + defp attribute_name(<>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | name: [attr.name | [c]]} + + # NOTE: token here can be a StartTag or EndTag. Attributes on end tags will be ignored. + new_token = %{s.token | attributes: [new_attr | attrs]} + + attribute_name(html, %{s | token: new_token}) + end + + # § tokenizer-after-attribute-name-state + + defp after_attribute_name(<>, s) + when is_space(c) do + after_attribute_name(html, s) + end + + defp after_attribute_name(<>, s) do + self_closing_start_tag(html, s) + end + + defp after_attribute_name(<>, s) do + before_attribute_value(html, s) + end + + defp after_attribute_name(<, html::binary>>, s) do + data(html, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil + }) + end + + defp after_attribute_name("", s) do + eof(:data, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_attribute_name(html, s) do + attribute = %Attribute{name: "", value: ""} + new_token = %StartTag{s.token | attributes: [attribute | s.token.attributes]} + + attribute_name(html, %{s | token: new_token}) + end + + # § tokenizer-before-attribute-value-state + + defp before_attribute_value(<>, s) + when is_space(c) do + before_attribute_value(html, s) + end + + defp before_attribute_value(<>, s) do + attribute_value_double_quoted(html, s) + end + + defp before_attribute_value(<>, s) do + attribute_value_single_quoted(html, s) + end + + defp before_attribute_value(html = <, _rest::binary>>, s) do + attribute_value_unquoted(html, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_attribute_value(html, s) do + attribute_value_unquoted(html, s) + end + + # § tokenizer-attribute-value-double-quoted-state + + defp attribute_value_double_quoted(<>, s) do + after_attribute_value_quoted(html, s) + end + + defp attribute_value_double_quoted(<>, s) do + character_reference(html, %{s | return_state: :attribute_value_double_quoted}) + end + + defp attribute_value_double_quoted(<<0, html::binary>>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [@replacement_char]]} + + attribute_value_double_quoted(html, %{ + s + | errors: [{:parse_error, nil} | s.errors], + token: %StartTag{s.token | attributes: [new_attr | attrs]} + }) + end + + defp attribute_value_double_quoted("", s) do + eof(:attribute_value_double_quoted, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp attribute_value_double_quoted(<>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [c]]} + + attribute_value_double_quoted(html, %{ + s + | token: %StartTag{s.token | attributes: [new_attr | attrs]} + }) + end + + # § tokenizer-attribute-value-single-quoted-state + + defp attribute_value_single_quoted(<>, s) do + after_attribute_value_quoted(html, s) + end + + defp attribute_value_single_quoted(<>, s) do + character_reference(html, %{s | return_state: :attribute_value_single_quoted}) + end + + defp attribute_value_single_quoted(<<0, html::binary>>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [@replacement_char]]} + + attribute_value_single_quoted(html, %{ + s + | errors: [{:parse_error, nil} | s.errors], + token: %StartTag{s.token | attributes: [new_attr | attrs]} + }) + end + + defp attribute_value_single_quoted("", s) do + eof(:attribute_value_single_quoted, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp attribute_value_single_quoted(<>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [c]]} + + # NOTE: token here can be a StartTag or EndTag. Attributes on end tags will be ignored. + attribute_value_single_quoted(html, %{ + s + | token: %{s.token | attributes: [new_attr | attrs]} + }) + end + + # § tokenizer-attribute-value-unquoted-state + + defp attribute_value_unquoted(<>, s) when is_space(c) do + before_attribute_name(html, s) + end + + defp attribute_value_unquoted(<>, s) do + character_reference(html, %{s | return_state: :attribute_value_unquoted}) + end + + defp attribute_value_unquoted(<, html::binary>>, s) do + data(html, %{s | tokens: [s.emit.(s.token) | s.tokens], token: nil}) + end + + defp attribute_value_unquoted(<<0, html::binary>>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [@replacement_char]]} + + attribute_value_unquoted(html, %{ + s + | errors: [{:parse_error, nil} | s.errors], + token: %{s.token | attributes: [new_attr | attrs]} + }) + end + + defp attribute_value_unquoted(<>, s) + when c in [?", ?', ?<, ?=, ?`] do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [c]]} + + attribute_value_unquoted(html, %{ + s + | errors: [{:parse_error, nil} | s.errors], + token: %{s.token | attributes: [new_attr | attrs]} + }) + end + + defp attribute_value_unquoted("", s) do + eof(:attribute_value_unquoted, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp attribute_value_unquoted(<>, s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | [c]]} + + attribute_value_unquoted(html, %{ + s + | token: %{s.token | attributes: [new_attr | attrs]} + }) + end + + # § tokenizer-after-attribute-value-quoted-state + + defp after_attribute_value_quoted(<>, s) + when is_space(c) do + before_attribute_name(html, s) + end + + defp after_attribute_value_quoted(<>, s) do + self_closing_start_tag(html, s) + end + + defp after_attribute_value_quoted(<, html::binary>>, s) do + data(html, %{s | tokens: [s.emit.(s.token) | s.tokens], token: nil}) + end + + defp after_attribute_value_quoted("", s) do + eof(:after_attribute_value_quoted, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_attribute_value_quoted(html, s) do + before_attribute_name(html, s) + end + + # § tokenizer-self-closing-start-tag-state + + defp self_closing_start_tag(<, html::binary>>, s) do + tag = %StartTag{s.token | self_close: true} + data(html, %{s | tokens: [tag | s.tokens], token: nil}) + end + + defp self_closing_start_tag("", s) do + eof(:self_closing_start_tag, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + defp self_closing_start_tag(html, s) do + before_attribute_name(html, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-bogus-comment-state + + defp bogus_comment(<, html::binary>>, s) do + data(html, %{s | tokens: [s.emit.(s.token) | s.tokens], token: nil}) + end + + defp bogus_comment("", s) do + eof(:bogus_comment, %{s | tokens: [s.emit.(s.token) | s.tokens], token: nil}) + end + + defp bogus_comment(<<0, html::binary>>, s) do + comment = %Comment{s.token | data: [s.token.data | [@replacement_char]]} + + bogus_comment(html, %{s | token: comment}) + end + + defp bogus_comment(<>, s) do + comment = %Comment{s.token | data: [s.token.data | [c]]} + + bogus_comment(html, %{s | token: comment}) + end + + # § tokenizer-markup-declaration-open-state + + defp markup_declaration_open(<<"--", html::binary>>, s) do + token = %Comment{data: ""} + + comment_start( + html, + %{s | token: token} + ) + end + + defp markup_declaration_open( + <>, + s + ) + when d in [?D, ?d] and o in [?O, ?o] and c in [?C, ?c] and + t in [?T, ?t] and y in [?Y, ?y] and + p in [?P, ?p] and e in [?E, ?e] do + doctype(html, s) + end + + # TODO: fix the check for adjusted current node in HTML namespace + defp markup_declaration_open(<<"[CDATA[", html::binary>>, s = %State{adjusted_current_node: n}) + when not is_nil(n) do + cdata_section(html, s) + end + + defp markup_declaration_open(html, s) do + bogus_comment(html, %{ + s + | token: %Comment{}, + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-comment-start-state + + defp comment_start(<>, s) do + comment_start_dash(html, s) + end + + defp comment_start(<, html::binary>>, s) do + data(html, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment_start(html, s) do + comment(html, s) + end + + # § tokenizer-comment-start-dash-state + + defp comment_start_dash(<>, s) do + comment_end(html, s) + end + + defp comment_start_dash(<, html::binary>>, s) do + data(html, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment_start_dash("", s) do + eof(:comment_start_dash, %{ + s + | errors: [{:parse_error, nil} | s.errors], + tokens: [s.emit.(s.token) | s.tokens], + token: nil + }) + end + + defp comment_start_dash(html, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@hyphen_minus]]} + + comment(html, %{s | token: new_comment}) + end + + # § tokenizer-comment-state + + defp comment(<>, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@less_than_sign]]} + + comment_less_than_sign(html, %{s | token: new_comment}) + end + + defp comment(<>, s) do + comment_end_dash(html, s) + end + + defp comment(<<0, html::binary>>, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@replacement_char]]} + + comment(html, %{ + s + | token: new_comment, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment("", s) do + eof(:comment, %{ + s + | errors: [{:parse_error, nil} | s.errors], + tokens: [s.emit.(s.token) | s.tokens], + token: nil + }) + end + + defp comment(<>, s) do + new_token = %Comment{s.token | data: [s.token.data | [c]]} + + comment( + html, + %{s | token: new_token} + ) + end + + # § tokenizer-comment-less-than-sign-state + + defp comment_less_than_sign(<>, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@exclamation_mark]]} + + comment_less_than_sign_bang(html, %{s | token: new_comment}) + end + + defp comment_less_than_sign(<>, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@less_than_sign]]} + + comment_less_than_sign(html, %{s | token: new_comment}) + end + + defp comment_less_than_sign(html, s) do + comment(html, s) + end + + # § tokenizer-comment-less-than-sign-bang-state + + defp comment_less_than_sign_bang(<>, s) do + comment_less_than_sign_bang_dash(html, s) + end + + defp comment_less_than_sign_bang(html, s) do + comment(html, s) + end + + # § tokenizer-comment-less-than-sign-bang-dash-state + + defp comment_less_than_sign_bang_dash(<>, s) do + comment_less_than_sign_bang_dash_dash(html, s) + end + + defp comment_less_than_sign_bang_dash(html, s) do + comment_end_dash(html, s) + end + + # § tokenizer-comment-less-than-sign-bang-dash-dash-state + + defp comment_less_than_sign_bang_dash_dash(html = <, _rest::binary>>, s) do + comment_end(html, s) + end + + defp comment_less_than_sign_bang_dash_dash(html = "", s) do + comment_end(html, s) + end + + defp comment_less_than_sign_bang_dash_dash(html, s) do + comment_end(html, %{s | errors: [{:parse_error, nil} | s.errors]}) + end + + # § tokenizer-comment-end-dash-state + + defp comment_end_dash(<>, s) do + comment_end(html, s) + end + + defp comment_end_dash("", s) do + eof(:comment_end_dash, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment_end_dash(html, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@hyphen_minus]]} + + comment(html, %{s | token: new_comment}) + end + + # § tokenizer-comment-end-state + + defp comment_end(<, html::binary>>, s) do + data( + html, + %{s | tokens: [s.emit.(s.token) | s.tokens], token: nil} + ) + end + + defp comment_end(<>, s) do + comment_end_bang(html, s) + end + + defp comment_end(<>, s) do + new_comment = %Comment{s.token | data: [s.token.data | [@hyphen_minus]]} + + comment_end(html, %{s | token: new_comment}) + end + + defp comment_end("", s) do + eof(:comment_end, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment_end(html, s) do + new_comment = %Comment{s.token | data: [s.token.data | "--"]} + + comment(html, %{s | token: new_comment}) + end + + # § tokenizer-comment-end-bang-state + + defp comment_end_bang(<>, s) do + new_comment = %Comment{s.token | data: [s.token.data | "--!"]} + + comment_end_dash(html, %{s | token: new_comment}) + end + + defp comment_end_bang(<, html::binary>>, s) do + data(html, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment_end_bang("", s) do + eof(:comment_end_bang, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp comment_end_bang(html, s) do + new_comment = %Comment{s.token | data: [s.token.data | "--!"]} + + comment(html, %{s | token: new_comment}) + end + + # § tokenizer-doctype-state + + defp doctype(<>, s) + when is_space(c) do + before_doctype_name(html, s) + end + + defp doctype("", s) do + doctype_token = %Doctype{force_quirks: :on} + eof(:doctype, %{s | tokens: [doctype_token | s.tokens], token: nil}) + end + + defp doctype(html, s) do + before_doctype_name(html, %{ + s + | errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-before-doctype-name-state + + defp before_doctype_name(<>, s) + when is_space(c) do + before_doctype_name(html, s) + end + + defp before_doctype_name(<>, s) + when is_upper_letter(c) do + token = %Doctype{name: [c + 32]} + + doctype_name(html, %{s | token: token}) + end + + defp before_doctype_name(<<0, html::binary>>, s) do + token = %Doctype{ + name: [@replacement_char], + force_quirks: :on + } + + doctype_name(html, %{s | token: token}) + end + + defp before_doctype_name(<, html::binary>>, s) do + token = %Doctype{ + force_quirks: :on + } + + data(html, %{ + s + | tokens: [token | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_name("", s) do + token = %Doctype{ + force_quirks: :on + } + + eof(:before_doctype_name, %{ + s + | tokens: [token | s.tokens], + token: nil, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_name(<>, s) do + token = %Doctype{ + name: [c] + } + + doctype_name(html, %{s | token: token}) + end + + # § tokenizer-doctype-name-state + + defp doctype_name(<>, s) + when is_space(c) do + after_doctype_name(html, s) + end + + defp doctype_name(<, html::binary>>, s) do + data(html, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil + }) + end + + defp doctype_name(<>, s) when is_upper_letter(c) do + new_token = %Doctype{ + s.token + | name: [s.token.name | [c + 32]] + } + + doctype_name(html, %{s | token: new_token}) + end + + defp doctype_name(<<0, html::binary>>, s) do + new_token = %Doctype{s.token | name: [s.token.name | [@replacement_char]]} + + doctype_name(html, %{ + s + | token: new_token, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_name("", s) do + new_token = %Doctype{s.token | force_quirks: :on} + + eof(:doctype_name, %{ + s + | tokens: [new_token | s.tokens], + token: nil, + errors: [{:parse_error, "eof-in-doctype"} | s.errors] + }) + end + + defp doctype_name(<>, s) do + new_token = %Doctype{s.token | name: [s.token.name | [c]]} + + doctype_name(html, %{s | token: new_token}) + end + + # § tokenizer-after-doctype-name-state + + defp after_doctype_name(<>, s) + when is_space(c) do + after_doctype_name(html, s) + end + + defp after_doctype_name(<, html::binary>>, s) do + data(html, %{ + s + | tokens: [s.emit.(s.token) | s.tokens], + token: nil + }) + end + + defp after_doctype_name("", s) do + token = %Doctype{s.token | force_quirks: :on} + + eof(:after_doctype_name, %{ + s + | tokens: [token | s.tokens], + token: nil, + errors: [{:parse_error, "eof-in-doctype"} | s.errors] + }) + end + + defp after_doctype_name( + <>, + s + ) + when p in [?P, ?p] and u in [?U, ?u] and b in [?B, ?b] and + l in [?L, ?l] and i in [?I, ?i] and + c in [?C, ?c] do + after_doctype_public_keyword(html, s) + end + + defp after_doctype_name( + <>, + state + ) + when s1 in [?S, ?s] and y in [?Y, ?y] and + s2 in [?S, ?s] and t in [?T, ?t] and + e in [?E, ?e] and m in [?M, ?m] do + after_doctype_system_keyword(html, state) + end + + defp after_doctype_name(html, s) do + token = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: token, + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-after-doctype-public-keyword-state + + defp after_doctype_public_keyword(<>, s) + when is_space(c) do + before_doctype_public_identifier(html, s) + end + + defp after_doctype_public_keyword(<>, s) do + doctype = %Doctype{s.token | public_id: ""} + + doctype_public_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_keyword(<>, s) do + doctype = %Doctype{s.token | public_id: ""} + + doctype_public_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_keyword(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_keyword("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:after_doctype_public_keyword, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_keyword(<<_c::utf8, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-before-doctype-public-identifier-state + + defp before_doctype_public_identifier(<>, s) + when is_space(c) do + before_doctype_public_identifier(html, s) + end + + defp before_doctype_public_identifier(<>, s) do + doctype = %Doctype{s.token | public_id: ""} + + doctype_public_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_public_identifier(<>, s) do + doctype = %Doctype{s.token | public_id: ""} + + doctype_public_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_public_identifier(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_public_identifier("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:before_doctype_public_identifier, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_public_identifier(<<_::utf8, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-doctype-public-identifier-double-quoted-state + + defp doctype_public_identifier_double_quoted(<>, s) do + after_doctype_public_identifier(html, s) + end + + defp doctype_public_identifier_double_quoted(<<0, html::binary>>, s) do + doctype = %Doctype{s.token | public_id: [s.token.public_id | [@replacement_char]]} + + doctype_public_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_public_identifier_double_quoted(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_public_identifier_double_quoted("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:doctype_public_identifier_double_quoted, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_public_identifier_double_quoted(<>, s) do + doctype = %Doctype{s.token | public_id: [s.token.public_id | [c]]} + + doctype_public_identifier_double_quoted(html, %{s | token: doctype}) + end + + # § tokenizer-doctype-public-identifier-single-quoted-state + + defp doctype_public_identifier_single_quoted(<>, s) do + after_doctype_public_identifier(html, s) + end + + defp doctype_public_identifier_single_quoted(<<0, html::binary>>, s) do + doctype = %Doctype{s.token | public_id: [s.token.public_id | [@replacement_char]]} + + doctype_public_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_public_identifier_single_quoted(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_public_identifier_single_quoted("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:doctype_public_identifier_single_quoted, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_public_identifier_single_quoted(<>, s) do + doctype = %Doctype{s.token | public_id: [s.token.public_id | [c]]} + + doctype_public_identifier_single_quoted(html, %{s | token: doctype}) + end + + # § tokenizer-after-doctype-public-identifier-state + + defp after_doctype_public_identifier(<>, s) when is_space(c) do + between_doctype_public_and_system_identifiers(html, s) + end + + defp after_doctype_public_identifier(<, html::binary>>, s) do + data(html, %{s | token: nil, tokens: [s.emit.(s.token) | s.tokens]}) + end + + defp after_doctype_public_identifier(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_identifier(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_identifier("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:after_doctype_public_identifier, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_public_identifier(<<_c::utf8, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-between-doctype-public-and-system-identifiers-state + + defp between_doctype_public_and_system_identifiers(<>, s) when is_space(c) do + between_doctype_public_and_system_identifiers(html, s) + end + + defp between_doctype_public_and_system_identifiers(<, html::binary>>, s) do + data(html, %{s | token: nil, tokens: [s.emit.(s.token) | s.tokens]}) + end + + defp between_doctype_public_and_system_identifiers(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_double_quoted(html, %{s | token: doctype}) + end + + defp between_doctype_public_and_system_identifiers(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_single_quoted(html, %{s | token: doctype}) + end + + defp between_doctype_public_and_system_identifiers("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:between_doctype_public_and_system_identifiers, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp between_doctype_public_and_system_identifiers(<<_c::utf8, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-after-doctype-system-keyword-state + + defp after_doctype_system_keyword(<>, s) when is_space(c) do + before_doctype_system_identifier(html, s) + end + + defp after_doctype_system_keyword(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_system_keyword(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_system_keyword(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_system_keyword("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:after_doctype_system_keyword, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_system_keyword(<<_c::utf8, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-before-doctype-system-identifier-state + + defp before_doctype_system_identifier(<>, s) when is_space(c) do + before_doctype_system_identifier(html, s) + end + + defp before_doctype_system_identifier(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_system_identifier(<>, s) do + doctype = %Doctype{s.token | system_id: ""} + + doctype_system_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_system_identifier(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_system_identifier("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:before_doctype_system_identifier, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp before_doctype_system_identifier(<<_::utf8, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + bogus_doctype(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-doctype-system-identifier-double-quoted-state + + defp doctype_system_identifier_double_quoted(<>, s) do + after_doctype_system_identifier(html, s) + end + + defp doctype_system_identifier_double_quoted(<<0, html::binary>>, s) do + doctype = %Doctype{s.token | system_id: [s.token.system_id | [@replacement_char]]} + + doctype_system_identifier_double_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_system_identifier_double_quoted(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_system_identifier_double_quoted("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:doctype_system_identifier_double_quoted, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_system_identifier_double_quoted(<>, s) do + doctype = %Doctype{s.token | system_id: [s.token.system_id | [c]]} + + doctype_system_identifier_double_quoted(html, %{s | token: doctype}) + end + + # § tokenizer-doctype-system-identifier-single-quoted-state + + defp doctype_system_identifier_single_quoted(<>, s) do + after_doctype_system_identifier(html, s) + end + + defp doctype_system_identifier_single_quoted(<<0, html::binary>>, s) do + doctype = %Doctype{s.token | system_id: [s.token.system_id | [@replacement_char]]} + + doctype_system_identifier_single_quoted(html, %{ + s + | token: doctype, + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_system_identifier_single_quoted(<, html::binary>>, s) do + doctype = %Doctype{s.token | force_quirks: :on} + + data(html, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_system_identifier_single_quoted("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:doctype_system_identifier_single_quoted, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp doctype_system_identifier_single_quoted(<>, s) do + doctype = %Doctype{s.token | system_id: [s.token.system_id | [c]]} + + doctype_system_identifier_single_quoted(html, %{s | token: doctype}) + end + + # § tokenizer-after-doctype-system-identifier-state + + defp after_doctype_system_identifier(<>, s) when is_space(c) do + after_doctype_system_identifier(html, s) + end + + defp after_doctype_system_identifier(<, html::binary>>, s) do + data(html, %{s | token: nil, tokens: [s.emit.(s.token) | s.tokens]}) + end + + defp after_doctype_system_identifier("", s) do + doctype = %Doctype{s.token | force_quirks: :on} + + eof(:after_doctype_system_identifier, %{ + s + | token: nil, + tokens: [doctype | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + defp after_doctype_system_identifier(<<_c::utf8, html::binary>>, s) do + bogus_doctype(html, %{ + s + | token: nil, + tokens: [s.emit.(s.token) | s.tokens], + errors: [{:parse_error, nil} | s.errors] + }) + end + + # § tokenizer-bogus-doctype-state + + defp bogus_doctype(<, html::binary>>, s) do + data(html, %{s | token: nil, tokens: [s.emit.(s.token) | s.tokens]}) + end + + defp bogus_doctype(<<0, html::binary>>, s) do + # TODO: set error + bogus_doctype(html, s) + end + + defp bogus_doctype("", s) do + eof(:bogus_doctype, %{s | token: nil, tokens: [s.emit.(s.token) | s.tokens]}) + end + + defp bogus_doctype(<<_c::utf8, html::binary>>, s) do + bogus_doctype(html, s) + end + + # § tokenizer-cdata-section-state + + defp cdata_section(<>, s) do + cdata_section_bracket(html, s) + end + + defp cdata_section("", s) do + eof(:cdata_section, %{s | errors: [{:parse_error, nil} | s.errors]}) + end + + defp cdata_section(<>, s) do + cdata_section(html, %{s | tokens: append_char_token(s, c)}) + end + + # § tokenizer-cdata-section-bracket-state + + defp cdata_section_bracket(<>, s) do + cdata_section_end(html, s) + end + + defp cdata_section_bracket(html, s) do + cdata_section(html, %{s | tokens: append_char_token(s, ?])}) + end + + # § tokenizer-cdata-section-end-state + + defp cdata_section_end(<>, s) do + cdata_section_end(html, %{s | tokens: append_char_token(s, ?])}) + end + + defp cdata_section_end(<, html::binary>>, s) do + data(html, s) + end + + defp cdata_section_end(html, s) do + cdata_section(html, %{s | tokens: append_char_token(s, [?], ?]])}) + end + + # § tokenizer-character-reference-state + + defp character_reference(<> = html, s) + when c in [?<, ?& | @space_chars] do + character_reference_end(html, %{s | buffer: "&"}) + end + + defp character_reference(<>, s) do + numeric_character_reference(html, %{s | buffer: ["&" | [?#]]}) + end + + defp character_reference(html, s) do + seek_charref(html, %{s | buffer: "&", charref_state: %CharrefState{done: false}}) + end + + defp seek_charref( + <>, + s = %State{charref_state: %CharrefState{done: false}} + ) + when c == ?; or is_letter(c) or + is_digit(c) do + buffer = IO.chardata_to_string([s.buffer | [c]]) + candidate = Floki.Entities.get(buffer) + + charref_state = + if candidate != [] do + %CharrefState{s.charref_state | candidate: buffer} + else + s.charref_state + end + + len = charref_state.length + 1 + done_by_length? = len > 60 + done_by_semicolon? = c == ?; + + seek_charref(html, %{ + s + | buffer: buffer, + charref_state: %{ + charref_state + | length: len, + done: done_by_semicolon? || done_by_length? + } + }) + end + + defp seek_charref(html, s) do + charref_state = %CharrefState{s.charref_state | done: true} + + seek_charref_end(html, %{s | charref_state: charref_state}) + end + + defp seek_charref_end(html, s = %State{return_state: return_state}) + when return_state in [ + :attribute_value_double_quoted, + :attribute_value_single_quoted, + :attribute_value_unquoted + ] do + last_char = + s.buffer + |> IO.chardata_to_string() + |> String.codepoints() + |> List.last() + + with true <- last_char != ";", + <> + when c == ?= or is_letter(c) or + is_digit(c) <- html do + character_reference_end(html, s) + else + _ -> + buffer = + if s.buffer == s.charref_state.candidate do + character_buffer(s) + else + s.buffer + end + + character_reference_end(html, %{s | buffer: buffer}) + end + end + + defp seek_charref_end(html, s) do + candidate = s.charref_state.candidate + + ends_with_semicolon? = String.ends_with?(s.buffer, ";") + + parse_error_on_unmatch? = + String.starts_with?(s.buffer, "&") && ends_with_semicolon? && candidate == nil + + parse_error_on_non_semicolon_ending? = !ends_with_semicolon? + + state = + cond do + parse_error_on_unmatch? -> + %{s | errors: [{:parse_error, nil} | s.errors]} + + parse_error_on_non_semicolon_ending? -> + %{ + s + | errors: [ + { + :parse_error, + "missing-semicolon-after-character-reference" + } + | s.errors + ] + } + + true -> + s + end + + buffer = character_buffer(s) + html = charref_html_after_buffer(html, s) + + character_reference_end(html, %{state | buffer: buffer}) + end + + defp character_buffer(%State{charref_state: %CharrefState{candidate: candidate}, buffer: buffer}) do + if candidate do + Floki.Entities.get(candidate) + else + buffer + end + end + + ## Helper functions that modifies the HTML string. + # OPTIMIZE: avoid concatenation of string. + defp charref_html_after_buffer(html, %State{ + charref_state: %CharrefState{candidate: candidate}, + buffer: buffer + }) + when is_binary(buffer) and is_binary(candidate) do + String.replace_prefix(buffer, candidate, "") <> html + end + + defp charref_html_after_buffer( + html, + s = %State{ + charref_state: %CharrefState{candidate: candidate} + } + ) + when is_binary(candidate) do + String.replace_prefix(IO.chardata_to_string(s.buffer), candidate, "") <> html + end + + defp charref_html_after_buffer(html, _), do: html + + # § tokenizer-numeric-character-reference-state + + defp numeric_character_reference(html, s) do + do_numeric_character_reference(html, %{s | charref_code: 0}) + end + + defp do_numeric_character_reference(<>, s) + when c in [?x, ?X] do + hexadecimal_character_reference_start(html, %{s | buffer: [s.buffer | [c]]}) + end + + defp do_numeric_character_reference(html, s) do + decimal_character_reference_start(html, s) + end + + # § tokenizer-hexadecimal-character-reference-start-state + + defp hexadecimal_character_reference_start(html = <>, s) + when is_letter(c) or is_digit(c) do + hexadecimal_character_reference(html, s) + end + + defp hexadecimal_character_reference_start(html, s) do + # set parse error + + character_reference_end(html, s) + end + + # § tokenizer-decimal-character-reference-start-state + + defp decimal_character_reference_start(html = <>, s) when is_digit(c) do + decimal_character_reference(html, s) + end + + defp decimal_character_reference_start(html, s) do + # set parse error + character_reference_end(html, s) + end + + # § tokenizer-hexadecimal-character-reference-state + + defp hexadecimal_character_reference(<>, s) when is_digit(c) do + hexadecimal_character_reference(html, %{s | charref_code: s.charref_code * 16 + c - 0x30}) + end + + defp hexadecimal_character_reference(<>, s) when c in ?A..?F do + hexadecimal_character_reference(html, %{s | charref_code: s.charref_code * 16 + c - 0x37}) + end + + defp hexadecimal_character_reference(<>, s) when c in ?a..?f do + hexadecimal_character_reference(html, %{s | charref_code: s.charref_code * 16 + c - 0x57}) + end + + defp hexadecimal_character_reference(<>, s) do + numeric_character_reference_end(html, s) + end + + defp hexadecimal_character_reference(html, s) do + # set parse error + numeric_character_reference_end(html, s) + end + + # § tokenizer-decimal-character-reference-state + + defp decimal_character_reference(<>, s) when is_digit(c) do + decimal_character_reference(html, %{s | charref_code: s.charref_code * 10 + c - 0x30}) + end + + defp decimal_character_reference(<>, s) do + numeric_character_reference_end(html, s) + end + + defp decimal_character_reference(html, s) do + # set parse error + + numeric_character_reference_end(html, s) + end + + # § tokenizer-decimal-character-reference-state + + defp numeric_character_reference_end(html, s) do + # set parse errors + {:ok, {_, numeric_char}} = Floki.HTML.NumericCharref.to_unicode_number(s.charref_code) + + character_reference_end(html, %{s | buffer: [numeric_char]}) + end + + # § tokenizer-character-reference-end-state + + @spec character_reference_end(binary(), State.t()) :: State.t() + defp character_reference_end(html, s) do + state = + if part_of_attr?(s) do + [attr | attrs] = s.token.attributes + new_attr = %Attribute{attr | value: [attr.value | s.buffer]} + new_tag = %StartTag{s.token | attributes: [new_attr | attrs]} + + %{s | token: new_tag} + else + %{s | tokens: append_char_token(s, s.buffer)} + end + + case state.return_state do + :data -> + data(html, state) + + :rcdata -> + rcdata(html, state) + + :attribute_value_unquoted -> + attribute_value_unquoted(html, state) + + :attribute_value_single_quoted -> + attribute_value_single_quoted(html, state) + + :attribute_value_double_quoted -> + attribute_value_double_quoted(html, state) + end + end + + defp part_of_attr?(state) do + state.return_state in [ + :attribute_value_double_quoted, + :attribute_value_single_quoted, + :attribute_value_unquoted + ] + end + + defp append_char_token(state, char) do + case state.tokens do + [{:char, data} | rest] -> + if is_binary(char) do + [state.emit.({:char, [data | char]}) | rest] + else + [state.emit.({:char, [data | [char]]}) | rest] + end + + other_tokens -> + if is_list(char) || is_binary(char) do + [state.emit.({:char, char}) | other_tokens] + else + [state.emit.({:char, [char]}) | other_tokens] + end + end + end + + defp appropriate_tag?(state) do + with %StartTag{name: start_tag_name} <- state.last_start_tag, + %EndTag{name: end_tag_name} <- state.token do + IO.chardata_to_string(start_tag_name) == IO.chardata_to_string(end_tag_name) + else + _ -> false + end + end + + defp tokens_for_inappropriate_end_tag(state) do + [ + state.emit.({:char, state.buffer}), + state.emit.({:char, [@solidus]}), + state.emit.({:char, [@less_than_sign]}) | state.tokens + ] + end +end diff --git a/lib/floki/html_parser.ex b/lib/floki/html_parser.ex index 8963d56a..55f2e1b3 100644 --- a/lib/floki/html_parser.ex +++ b/lib/floki/html_parser.ex @@ -3,8 +3,8 @@ defmodule Floki.HTMLParser do A entry point to dynamic dispatch functions to the configured HTML parser. - The configuration can be done with the "html_parser" - option when calling the functions, or for the "floki" application: + The configuration can be done with the `:html_parser` + option when calling the functions, or for the `:floki` application: Floki.parse_document(document, html_parser: Floki.HTMLParser.FastHtml) @@ -33,6 +33,6 @@ defmodule Floki.HTMLParser do end defp parser(opts) do - Keyword.get(opts, :html_parser) || Application.get_env(:floki, :html_parser, @default_parser) + opts[:html_parser] || Application.get_env(:floki, :html_parser, @default_parser) end end diff --git a/lib/floki/html_tree/comment.ex b/lib/floki/html_tree/comment.ex index 24b21740..42e2e107 100644 --- a/lib/floki/html_tree/comment.ex +++ b/lib/floki/html_tree/comment.ex @@ -2,6 +2,7 @@ defmodule Floki.HTMLTree.Comment do @moduledoc false # Represents a comment inside an HTML tree with reference to its parent node id. + # TODO: rename content to data defstruct content: "", node_id: nil, parent_node_id: nil @type t :: %__MODULE__{ diff --git a/lib/floki/selector.ex b/lib/floki/selector.ex index a3ed0cc5..0bd9476e 100644 --- a/lib/floki/selector.ex +++ b/lib/floki/selector.ex @@ -210,6 +210,10 @@ defmodule Floki.Selector do PseudoClass.match_contains?(tree, html_node, pseudo_class) end + defp pseudo_class_match?(html_node, %{name: "root"}, tree) do + PseudoClass.match_root?(html_node, tree) + end + defp pseudo_class_match?(_html_node, %{name: unknown_pseudo_class}, _tree) do Logger.info(fn -> "Pseudo-class #{inspect(unknown_pseudo_class)} is not implemented. Ignoring." diff --git a/lib/floki/selector/attribute_selector.ex b/lib/floki/selector/attribute_selector.ex index 6174025c..4abbf8da 100644 --- a/lib/floki/selector/attribute_selector.ex +++ b/lib/floki/selector/attribute_selector.ex @@ -6,17 +6,18 @@ defmodule Floki.Selector.AttributeSelector do alias Floki.Selector.AttributeSelector - defstruct match_type: nil, attribute: nil, value: nil + defstruct match_type: nil, attribute: nil, value: nil, flag: nil @type t :: %__MODULE__{ match_type: :atom | nil, attribute: String.t(), - value: String.t() | nil + value: String.t() | nil, + flag: String.t() | nil } defimpl String.Chars do def to_string(selector) do - "[#{selector.attribute}#{type(selector.match_type)}'#{selector.value}']" + "[#{selector.attribute}#{type(selector.match_type)}'#{selector.value}'#{flag(selector.flag)}]" end defp type(match_type) do @@ -30,6 +31,9 @@ defmodule Floki.Selector.AttributeSelector do _ -> "" end end + + defp flag(nil), do: "" + defp flag(flag), do: " " <> flag end # Returns if attributes of a node matches with a given attribute selector. @@ -37,6 +41,51 @@ defmodule Floki.Selector.AttributeSelector do attribute_present?(s.attribute, attributes) end + # Case-insensitive matches + + def match?(attributes, s = %AttributeSelector{match_type: :equal, flag: "i"}) do + String.downcase(get_value(s.attribute, attributes)) == String.downcase(s.value) + end + + def match?(attributes, s = %AttributeSelector{match_type: :includes, flag: "i"}) do + selector_value = String.downcase(s.value) + value = String.downcase(get_value(s.attribute, attributes)) + + whitespace_values = String.split(value, ~r/\s+/) + + Enum.any?(whitespace_values, fn v -> v == selector_value end) + end + + def match?(attributes, s = %AttributeSelector{match_type: :dash_match, flag: "i"}) do + selector_value = String.downcase(s.value) + value = String.downcase(get_value(s.attribute, attributes)) + + value == selector_value || String.starts_with?(value, "#{selector_value}-") + end + + def match?(attributes, s = %AttributeSelector{match_type: :prefix_match, flag: "i"}) do + s.attribute + |> get_value(attributes) + |> String.downcase() + |> String.starts_with?(String.downcase(s.value)) + end + + def match?(attributes, s = %AttributeSelector{match_type: :sufix_match, flag: "i"}) do + s.attribute + |> get_value(attributes) + |> String.downcase() + |> String.ends_with?(String.downcase(s.value)) + end + + def match?(attributes, s = %AttributeSelector{match_type: :substring_match, flag: "i"}) do + s.attribute + |> get_value(attributes) + |> String.downcase() + |> String.contains?(String.downcase(s.value)) + end + + # Case-sensitive matches + def match?(attributes, s = %AttributeSelector{match_type: :equal}) do get_value(s.attribute, attributes) == s.value end diff --git a/lib/floki/selector/parser.ex b/lib/floki/selector/parser.ex index 453c6f64..ffd038ec 100644 --- a/lib/floki/selector/parser.ex +++ b/lib/floki/selector/parser.ex @@ -54,7 +54,7 @@ defmodule Floki.Selector.Parser do end defp do_parse([{:hash, _, id} | t], selector) do - do_parse(t, %{selector | id: to_string(id)}) + do_parse(t, %{selector | id: to_string(id) |> String.replace("\\.", ".")}) end defp do_parse([{:class, _, class} | t], selector) do @@ -168,6 +168,12 @@ defmodule Floki.Selector.Parser do consume_attribute(:consuming, t, new_selector) end + defp consume_attribute(:consuming, [{:attribute_identifier, _, value} | t], attr_selector) do + flag = String.at(to_string(value), -2) + new_selector = %{attr_selector | flag: flag} + consume_attribute(:done, t, new_selector) + end + defp consume_attribute(:consuming, [{']', _} | t], attr_selector) do consume_attribute(:done, t, attr_selector) end diff --git a/lib/floki/selector/pseudo_class.ex b/lib/floki/selector/pseudo_class.ex index 18c0fd8c..ed48b5c9 100644 --- a/lib/floki/selector/pseudo_class.ex +++ b/lib/floki/selector/pseudo_class.ex @@ -144,6 +144,10 @@ defmodule Floki.Selector.PseudoClass do false end + def match_root?(html_node, tree) do + html_node.node_id in tree.root_nodes_ids + end + defp node_position(ids, %HTMLNode{node_id: node_id}) do {_node_id, position} = Enum.find(ids, fn {id, _} -> id == node_id end) diff --git a/lib/mix/tasks/generate_entities.ex b/lib/mix/tasks/generate_entities.ex new file mode 100644 index 00000000..32dc6989 --- /dev/null +++ b/lib/mix/tasks/generate_entities.ex @@ -0,0 +1,57 @@ +defmodule Mix.Tasks.GenerateEntities do + @shortdoc "Generate the entities module" + + @json_entities_path "priv/entities.json" + @destination_module_path "lib/floki/entities.ex" + + use Mix.Task + + @impl Mix.Task + def run(_) do + Mix.shell().info("Generating entities..") + + {:ok, content} = File.read(@json_entities_path) + {:ok, json} = Jason.decode(content) + + headers = """ + defmodule Floki.Entities do + # This file was generated by "Mix.Tasks.GenerateEntities" + + @moduledoc false + + @doc \"\"\" + Returns unicode codepoints for a given HTML entity. + \"\"\" + @spec get(binary()) :: list(integer) + """ + + fun_template = fn {key, value} -> + [ + "def get(", + inspect(key), + "), do: ", + inspect(Map.fetch!(value, "codepoints"), charlists: :as_lists), + "\n" + ] + end + + functions = + json + |> Enum.sort_by(fn {key, _value} -> key end) + |> Enum.map(fun_template) + + bottom = """ + def get(_), do: [] + end + """ + + contents = + [headers, functions, bottom] + |> IO.iodata_to_binary() + |> Code.format_string!() + + File.write!(@destination_module_path, contents) + + Mix.shell().info("Entities module is located in lib/floki/entities.ex") + end +end diff --git a/lib/mix/tasks/generate_tokenizer_tests.ex b/lib/mix/tasks/generate_tokenizer_tests.ex new file mode 100644 index 00000000..3ec263f7 --- /dev/null +++ b/lib/mix/tasks/generate_tokenizer_tests.ex @@ -0,0 +1,79 @@ +defmodule Mix.Tasks.GenerateTokenizerTests do + @moduledoc """ + It generates tests based on test files from WHATWG. + + This task will take a look at tokenizer test files + that are located in "./test/html5lib-tests/tokenizer" + and generate modules to run those tests. + + This is necessary every time the specs of HTML change, + so we can keep up to date and also we can keep track + of what changed. + """ + + @shortdoc "Generate tokenizer tests based on specs." + + @base_path "test/html5lib-tests/tokenizer" + @html5lib_revision_path ".git/modules/test/html5lib-tests/HEAD" + @template_path "priv/tokenizer_test_template.ex.eex" + @destination_path "test/floki/html/generated/tokenizer" + + use Mix.Task + + @impl Mix.Task + def run([filename | _]) do + Mix.shell().info("generating #{filename}...") + + {:ok, content} = File.read(Path.join([@base_path, filename])) + {:ok, json} = Jason.decode(content) + + identity_fun = fn %{"description" => desc} -> desc end + revision = File.read!(@html5lib_revision_path) + + tests = + Map.fetch!(json, "tests") + |> Enum.filter(fn %{"description" => desc} -> + is_binary(desc) && desc != "" + end) + |> Enum.uniq_by(identity_fun) + |> Enum.sort_by(identity_fun) + + basename = String.split(filename, ".") |> List.first() + + if length(tests) <= 100 do + save_tests(basename, filename, tests, revision) + else + tests + |> Enum.chunk_every(100) + |> Enum.with_index(1) + |> Enum.each(fn {tests_group, idx} -> + save_tests(basename <> "_part#{idx}", filename, tests_group, revision) + end) + end + end + + defp save_tests(basename, filename, tests, revision) do + test_name = + basename + |> String.split("_") + |> Enum.map_join(&String.capitalize(&1)) + + destination_path = Path.join([@destination_path, basename <> "_test.exs"]) + + contents = + @template_path + |> EEx.eval_file( + tests: tests, + test_name: test_name, + test_file: filename, + revision: revision + ) + |> Code.format_string!() + + Mix.shell().info(contents) + + File.write!(destination_path, contents) + + Mix.shell().info("saved in #{destination_path}.") + end +end diff --git a/mix.exs b/mix.exs index e3177880..7d2b2d0f 100644 --- a/mix.exs +++ b/mix.exs @@ -3,7 +3,7 @@ defmodule Floki.Mixfile do @description "Floki is a simple HTML parser that enables search for nodes using CSS selectors." @source_url "https://github.com/philss/floki" - @version "0.31.0" + @version "0.32.0" def project do [ @@ -19,7 +19,8 @@ defmodule Floki.Mixfile do docs: docs(), dialyzer: [ plt_file: {:no_warn, "priv/plts/dialyzer.plt"} - ] + ], + elixirc_paths: elixirc_paths(Mix.env()) ] end @@ -56,12 +57,12 @@ defmodule Floki.Mixfile do [ {:html_entities, "~> 0.5.0"}, + {:jason, "~> 1.1", only: [:dev, :test, :docs]}, {:earmark, "~> 1.2", only: :dev}, - {:ex_doc, "~> 0.24.1", only: :dev, runtime: false}, + {:ex_doc, "~> 0.25.0", only: :dev, runtime: false}, {:benchee, "~> 1.0.1", only: :dev}, {:credo, ">= 0.0.0", only: [:dev, :test]}, - {:dialyxir, "~> 1.0", only: [:dev], runtime: false}, - {:inch_ex, "~> 2.1.0-rc.1", only: :docs} + {:dialyxir, "~> 1.0", only: [:dev], runtime: false} ] ++ parsers end @@ -104,7 +105,9 @@ defmodule Floki.Mixfile do maintainers: ["Philip Sampaio Silva"], licenses: ["MIT"], files: [ - "lib", + # We don't want to ship mix tasks. + "lib/floki", + "lib/floki.ex", "src/*.xrl", "src/floki_mochi_html.erl", "src/floki.gleam", @@ -122,4 +125,8 @@ defmodule Floki.Mixfile do } } end + + # Specifies which paths to compile per environment. + defp elixirc_paths(:test), do: ["test/support", "lib"] + defp elixirc_paths(_), do: ["lib"] end diff --git a/mix.lock b/mix.lock index 4ac82db9..0f84d274 100644 --- a/mix.lock +++ b/mix.lock @@ -4,22 +4,21 @@ "credo": {:hex, :credo, "1.5.6", "e04cc0fdc236fefbb578e0c04bd01a471081616e741d386909e527ac146016c6", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "4b52a3e558bd64e30de62a648518a5ea2b6e3e5d2b164ef5296244753fc7eb17"}, "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, "dialyxir": {:hex, :dialyxir, "1.1.0", "c5aab0d6e71e5522e77beff7ba9e08f8e02bad90dfbeffae60eaf0cb47e29488", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "07ea8e49c45f15264ebe6d5b93799d4dd56a44036cf42d0ad9c960bc266c0b9a"}, - "earmark": {:hex, :earmark, "1.4.15", "2c7f924bf495ec1f65bd144b355d0949a05a254d0ec561740308a54946a67888", [:mix], [{:earmark_parser, ">= 1.4.13", [hex: :earmark_parser, repo: "hexpm", optional: false]}], "hexpm", "3b1209b85bc9f3586f370f7c363f6533788fb4e51db23aa79565875e7f9999ee"}, - "earmark_parser": {:hex, :earmark_parser, "1.4.13", "0c98163e7d04a15feb62000e1a891489feb29f3d10cb57d4f845c405852bbef8", [:mix], [], "hexpm", "d602c26af3a0af43d2f2645613f65841657ad6efc9f0e361c3b6c06b578214ba"}, + "earmark": {:hex, :earmark, "1.4.16", "2188754e590a3c379fdd2783bb44eedd8c54968fa0256b6f336f6d56b089d793", [:mix], [{:earmark_parser, ">= 1.4.16", [hex: :earmark_parser, repo: "hexpm", optional: false]}], "hexpm", "46f853f7ae10bee06923430dca522ba9dcbdc6b7a9729748e8dd5344d21b8418"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.16", "607709303e1d4e3e02f1444df0c821529af1c03b8578dfc81bb9cf64553d02b9", [:mix], [], "hexpm", "69fcf696168f5a274dd012e3e305027010658b2d1630cef68421d6baaeaccead"}, "elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"}, "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, - "ex_doc": {:hex, :ex_doc, "0.24.2", "e4c26603830c1a2286dae45f4412a4d1980e1e89dc779fcd0181ed1d5a05c8d9", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "e134e1d9e821b8d9e4244687fb2ace58d479b67b282de5158333b0d57c6fb7da"}, + "ex_doc": {:hex, :ex_doc, "0.25.3", "3edf6a0d70a39d2eafde030b8895501b1c93692effcbd21347296c18e47618ce", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "9ebebc2169ec732a38e9e779fd0418c9189b3ca93f4a676c961be6c1527913f5"}, "fast_html": {:hex, :fast_html, "2.0.4", "4910ee49f2f6b19692e3bf30bf97f1b6b7dac489cd6b0f34cd0fe3042c56ba30", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}], "hexpm", "3bb49d541dfc02ad5e425904f53376d758c09f89e521afc7d2b174b3227761ea"}, "file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, - "html5ever": {:hex, :html5ever, "0.8.0", "2114c27c28dbb0fc6a8e3936cfb7ca3ea12edc722f865cde4702ba9da4ed3f1c", [:mix], [{:rustler, "~> 0.21.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm", "4c50ac60a9dca114c359f657aca063cbc2970cdd572aa5e202b1368c2ecdb45a"}, + "html5ever": {:hex, :html5ever, "0.9.0", "8f51ff9222a95494f33477c44fd2a9132d1dba82bb452a04343a14e7e9920f1d", [:mix], [{:rustler, "~> 0.22.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm", "f3ca7ab856a75262a82acb4a49fc1e1078979efa2e33786ebade1860f9238e2d"}, "html_entities": {:hex, :html_entities, "0.5.2", "9e47e70598da7de2a9ff6af8758399251db6dbb7eebe2b013f2bbd2515895c3c", [:mix], [], "hexpm", "c53ba390403485615623b9531e97696f076ed415e8d8058b1dbaa28181f4fdcc"}, - "inch_ex": {:hex, :inch_ex, "2.1.0-rc.1", "7642a8902c0d2ed5d9b5754b2fc88fedf630500d630fc03db7caca2e92dedb36", [:mix], [{:bunt, "~> 0.2", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "4ceee988760f9382d1c1d0b93ea5875727f6071693e89a0a3c49c456ef1be75d"}, "jason": {:hex, :jason, "1.2.2", "ba43e3f2709fd1aa1dce90aaabfd039d000469c05c56f0b8e31978e03fa39052", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "18a228f5f0058ee183f29f9eae0805c6e59d61c3b006760668d8d18ff0d12179"}, "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"}, "makeup_elixir": {:hex, :makeup_elixir, "0.15.1", "b5888c880d17d1cc3e598f05cdb5b5a91b7b17ac4eaf5f297cb697663a1094dd", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "db68c173234b07ab2a07f645a5acdc117b9f99d69ebf521821d89690ae6c6ec8"}, "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"}, "nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"}, - "rustler": {:hex, :rustler, "0.21.1", "5299980be32da997c54382e945bacaa015ed97a60745e1e639beaf6a7b278c65", [:mix], [{:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6ee1651e10645b2b2f3bb70502bf180341aa058709177e9bc28c105934094bc6"}, + "rustler": {:hex, :rustler, "0.22.1", "0bc4806f8352a4aeb7f209a41cddca8454d8627bb11704b083371f794a1bfd00", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.5.2", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "ebb7a65b616e631c40309990b13bd29e91876a25c6fe8e0afcf1a3a0eab22bf1"}, "toml": {:hex, :toml, "0.5.2", "e471388a8726d1ce51a6b32f864b8228a1eb8edc907a0edf2bb50eab9321b526", [:mix], [], "hexpm", "f1e3dabef71fb510d015fad18c0e05e7c57281001141504c6b69d94e99750a07"}, } diff --git a/priv/entities.json b/priv/entities.json new file mode 100644 index 00000000..8a1f590a --- /dev/null +++ b/priv/entities.json @@ -0,0 +1,2233 @@ +{ + "Á": { "codepoints": [193], "characters": "\u00C1" }, + "Á": { "codepoints": [193], "characters": "\u00C1" }, + "á": { "codepoints": [225], "characters": "\u00E1" }, + "á": { "codepoints": [225], "characters": "\u00E1" }, + "Ă": { "codepoints": [258], "characters": "\u0102" }, + "ă": { "codepoints": [259], "characters": "\u0103" }, + "∾": { "codepoints": [8766], "characters": "\u223E" }, + "∿": { "codepoints": [8767], "characters": "\u223F" }, + "∾̳": { "codepoints": [8766, 819], "characters": "\u223E\u0333" }, + "Â": { "codepoints": [194], "characters": "\u00C2" }, + "Â": { "codepoints": [194], "characters": "\u00C2" }, + "â": { "codepoints": [226], "characters": "\u00E2" }, + "â": { "codepoints": [226], "characters": "\u00E2" }, + "´": { "codepoints": [180], "characters": "\u00B4" }, + "´": { "codepoints": [180], "characters": "\u00B4" }, + "А": { "codepoints": [1040], "characters": "\u0410" }, + "а": { "codepoints": [1072], "characters": "\u0430" }, + "Æ": { "codepoints": [198], "characters": "\u00C6" }, + "Æ": { "codepoints": [198], "characters": "\u00C6" }, + "æ": { "codepoints": [230], "characters": "\u00E6" }, + "æ": { "codepoints": [230], "characters": "\u00E6" }, + "⁡": { "codepoints": [8289], "characters": "\u2061" }, + "𝔄": { "codepoints": [120068], "characters": "\uD835\uDD04" }, + "𝔞": { "codepoints": [120094], "characters": "\uD835\uDD1E" }, + "À": { "codepoints": [192], "characters": "\u00C0" }, + "À": { "codepoints": [192], "characters": "\u00C0" }, + "à": { "codepoints": [224], "characters": "\u00E0" }, + "à": { "codepoints": [224], "characters": "\u00E0" }, + "ℵ": { "codepoints": [8501], "characters": "\u2135" }, + "ℵ": { "codepoints": [8501], "characters": "\u2135" }, + "Α": { "codepoints": [913], "characters": "\u0391" }, + "α": { "codepoints": [945], "characters": "\u03B1" }, + "Ā": { "codepoints": [256], "characters": "\u0100" }, + "ā": { "codepoints": [257], "characters": "\u0101" }, + "⨿": { "codepoints": [10815], "characters": "\u2A3F" }, + "&": { "codepoints": [38], "characters": "\u0026" }, + "&": { "codepoints": [38], "characters": "\u0026" }, + "&": { "codepoints": [38], "characters": "\u0026" }, + "&": { "codepoints": [38], "characters": "\u0026" }, + "⩕": { "codepoints": [10837], "characters": "\u2A55" }, + "⩓": { "codepoints": [10835], "characters": "\u2A53" }, + "∧": { "codepoints": [8743], "characters": "\u2227" }, + "⩜": { "codepoints": [10844], "characters": "\u2A5C" }, + "⩘": { "codepoints": [10840], "characters": "\u2A58" }, + "⩚": { "codepoints": [10842], "characters": "\u2A5A" }, + "∠": { "codepoints": [8736], "characters": "\u2220" }, + "⦤": { "codepoints": [10660], "characters": "\u29A4" }, + "∠": { "codepoints": [8736], "characters": "\u2220" }, + "⦨": { "codepoints": [10664], "characters": "\u29A8" }, + "⦩": { "codepoints": [10665], "characters": "\u29A9" }, + "⦪": { "codepoints": [10666], "characters": "\u29AA" }, + "⦫": { "codepoints": [10667], "characters": "\u29AB" }, + "⦬": { "codepoints": [10668], "characters": "\u29AC" }, + "⦭": { "codepoints": [10669], "characters": "\u29AD" }, + "⦮": { "codepoints": [10670], "characters": "\u29AE" }, + "⦯": { "codepoints": [10671], "characters": "\u29AF" }, + "∡": { "codepoints": [8737], "characters": "\u2221" }, + "∟": { "codepoints": [8735], "characters": "\u221F" }, + "⊾": { "codepoints": [8894], "characters": "\u22BE" }, + "⦝": { "codepoints": [10653], "characters": "\u299D" }, + "∢": { "codepoints": [8738], "characters": "\u2222" }, + "Å": { "codepoints": [197], "characters": "\u00C5" }, + "⍼": { "codepoints": [9084], "characters": "\u237C" }, + "Ą": { "codepoints": [260], "characters": "\u0104" }, + "ą": { "codepoints": [261], "characters": "\u0105" }, + "𝔸": { "codepoints": [120120], "characters": "\uD835\uDD38" }, + "𝕒": { "codepoints": [120146], "characters": "\uD835\uDD52" }, + "⩯": { "codepoints": [10863], "characters": "\u2A6F" }, + "≈": { "codepoints": [8776], "characters": "\u2248" }, + "⩰": { "codepoints": [10864], "characters": "\u2A70" }, + "≊": { "codepoints": [8778], "characters": "\u224A" }, + "≋": { "codepoints": [8779], "characters": "\u224B" }, + "'": { "codepoints": [39], "characters": "\u0027" }, + "⁡": { "codepoints": [8289], "characters": "\u2061" }, + "≈": { "codepoints": [8776], "characters": "\u2248" }, + "≊": { "codepoints": [8778], "characters": "\u224A" }, + "Å": { "codepoints": [197], "characters": "\u00C5" }, + "Å": { "codepoints": [197], "characters": "\u00C5" }, + "å": { "codepoints": [229], "characters": "\u00E5" }, + "å": { "codepoints": [229], "characters": "\u00E5" }, + "𝒜": { "codepoints": [119964], "characters": "\uD835\uDC9C" }, + "𝒶": { "codepoints": [119990], "characters": "\uD835\uDCB6" }, + "≔": { "codepoints": [8788], "characters": "\u2254" }, + "*": { "codepoints": [42], "characters": "\u002A" }, + "≈": { "codepoints": [8776], "characters": "\u2248" }, + "≍": { "codepoints": [8781], "characters": "\u224D" }, + "Ã": { "codepoints": [195], "characters": "\u00C3" }, + "Ã": { "codepoints": [195], "characters": "\u00C3" }, + "ã": { "codepoints": [227], "characters": "\u00E3" }, + "ã": { "codepoints": [227], "characters": "\u00E3" }, + "Ä": { "codepoints": [196], "characters": "\u00C4" }, + "Ä": { "codepoints": [196], "characters": "\u00C4" }, + "ä": { "codepoints": [228], "characters": "\u00E4" }, + "ä": { "codepoints": [228], "characters": "\u00E4" }, + "∳": { "codepoints": [8755], "characters": "\u2233" }, + "⨑": { "codepoints": [10769], "characters": "\u2A11" }, + "≌": { "codepoints": [8780], "characters": "\u224C" }, + "϶": { "codepoints": [1014], "characters": "\u03F6" }, + "‵": { "codepoints": [8245], "characters": "\u2035" }, + "∽": { "codepoints": [8765], "characters": "\u223D" }, + "⋍": { "codepoints": [8909], "characters": "\u22CD" }, + "∖": { "codepoints": [8726], "characters": "\u2216" }, + "⫧": { "codepoints": [10983], "characters": "\u2AE7" }, + "⊽": { "codepoints": [8893], "characters": "\u22BD" }, + "⌅": { "codepoints": [8965], "characters": "\u2305" }, + "⌆": { "codepoints": [8966], "characters": "\u2306" }, + "⌅": { "codepoints": [8965], "characters": "\u2305" }, + "⎵": { "codepoints": [9141], "characters": "\u23B5" }, + "⎶": { "codepoints": [9142], "characters": "\u23B6" }, + "≌": { "codepoints": [8780], "characters": "\u224C" }, + "Б": { "codepoints": [1041], "characters": "\u0411" }, + "б": { "codepoints": [1073], "characters": "\u0431" }, + "„": { "codepoints": [8222], "characters": "\u201E" }, + "∵": { "codepoints": [8757], "characters": "\u2235" }, + "∵": { "codepoints": [8757], "characters": "\u2235" }, + "∵": { "codepoints": [8757], "characters": "\u2235" }, + "⦰": { "codepoints": [10672], "characters": "\u29B0" }, + "϶": { "codepoints": [1014], "characters": "\u03F6" }, + "ℬ": { "codepoints": [8492], "characters": "\u212C" }, + "ℬ": { "codepoints": [8492], "characters": "\u212C" }, + "Β": { "codepoints": [914], "characters": "\u0392" }, + "β": { "codepoints": [946], "characters": "\u03B2" }, + "ℶ": { "codepoints": [8502], "characters": "\u2136" }, + "≬": { "codepoints": [8812], "characters": "\u226C" }, + "𝔅": { "codepoints": [120069], "characters": "\uD835\uDD05" }, + "𝔟": { "codepoints": [120095], "characters": "\uD835\uDD1F" }, + "⋂": { "codepoints": [8898], "characters": "\u22C2" }, + "◯": { "codepoints": [9711], "characters": "\u25EF" }, + "⋃": { "codepoints": [8899], "characters": "\u22C3" }, + "⨀": { "codepoints": [10752], "characters": "\u2A00" }, + "⨁": { "codepoints": [10753], "characters": "\u2A01" }, + "⨂": { "codepoints": [10754], "characters": "\u2A02" }, + "⨆": { "codepoints": [10758], "characters": "\u2A06" }, + "★": { "codepoints": [9733], "characters": "\u2605" }, + "▽": { "codepoints": [9661], "characters": "\u25BD" }, + "△": { "codepoints": [9651], "characters": "\u25B3" }, + "⨄": { "codepoints": [10756], "characters": "\u2A04" }, + "⋁": { "codepoints": [8897], "characters": "\u22C1" }, + "⋀": { "codepoints": [8896], "characters": "\u22C0" }, + "⤍": { "codepoints": [10509], "characters": "\u290D" }, + "⧫": { "codepoints": [10731], "characters": "\u29EB" }, + "▪": { "codepoints": [9642], "characters": "\u25AA" }, + "▴": { "codepoints": [9652], "characters": "\u25B4" }, + "▾": { "codepoints": [9662], "characters": "\u25BE" }, + "◂": { "codepoints": [9666], "characters": "\u25C2" }, + "▸": { "codepoints": [9656], "characters": "\u25B8" }, + "␣": { "codepoints": [9251], "characters": "\u2423" }, + "▒": { "codepoints": [9618], "characters": "\u2592" }, + "░": { "codepoints": [9617], "characters": "\u2591" }, + "▓": { "codepoints": [9619], "characters": "\u2593" }, + "█": { "codepoints": [9608], "characters": "\u2588" }, + "=⃥": { "codepoints": [61, 8421], "characters": "\u003D\u20E5" }, + "≡⃥": { "codepoints": [8801, 8421], "characters": "\u2261\u20E5" }, + "⫭": { "codepoints": [10989], "characters": "\u2AED" }, + "⌐": { "codepoints": [8976], "characters": "\u2310" }, + "𝔹": { "codepoints": [120121], "characters": "\uD835\uDD39" }, + "𝕓": { "codepoints": [120147], "characters": "\uD835\uDD53" }, + "⊥": { "codepoints": [8869], "characters": "\u22A5" }, + "⊥": { "codepoints": [8869], "characters": "\u22A5" }, + "⋈": { "codepoints": [8904], "characters": "\u22C8" }, + "⧉": { "codepoints": [10697], "characters": "\u29C9" }, + "┐": { "codepoints": [9488], "characters": "\u2510" }, + "╕": { "codepoints": [9557], "characters": "\u2555" }, + "╖": { "codepoints": [9558], "characters": "\u2556" }, + "╗": { "codepoints": [9559], "characters": "\u2557" }, + "┌": { "codepoints": [9484], "characters": "\u250C" }, + "╒": { "codepoints": [9554], "characters": "\u2552" }, + "╓": { "codepoints": [9555], "characters": "\u2553" }, + "╔": { "codepoints": [9556], "characters": "\u2554" }, + "─": { "codepoints": [9472], "characters": "\u2500" }, + "═": { "codepoints": [9552], "characters": "\u2550" }, + "┬": { "codepoints": [9516], "characters": "\u252C" }, + "╤": { "codepoints": [9572], "characters": "\u2564" }, + "╥": { "codepoints": [9573], "characters": "\u2565" }, + "╦": { "codepoints": [9574], "characters": "\u2566" }, + "┴": { "codepoints": [9524], "characters": "\u2534" }, + "╧": { "codepoints": [9575], "characters": "\u2567" }, + "╨": { "codepoints": [9576], "characters": "\u2568" }, + "╩": { "codepoints": [9577], "characters": "\u2569" }, + "⊟": { "codepoints": [8863], "characters": "\u229F" }, + "⊞": { "codepoints": [8862], "characters": "\u229E" }, + "⊠": { "codepoints": [8864], "characters": "\u22A0" }, + "┘": { "codepoints": [9496], "characters": "\u2518" }, + "╛": { "codepoints": [9563], "characters": "\u255B" }, + "╜": { "codepoints": [9564], "characters": "\u255C" }, + "╝": { "codepoints": [9565], "characters": "\u255D" }, + "└": { "codepoints": [9492], "characters": "\u2514" }, + "╘": { "codepoints": [9560], "characters": "\u2558" }, + "╙": { "codepoints": [9561], "characters": "\u2559" }, + "╚": { "codepoints": [9562], "characters": "\u255A" }, + "│": { "codepoints": [9474], "characters": "\u2502" }, + "║": { "codepoints": [9553], "characters": "\u2551" }, + "┼": { "codepoints": [9532], "characters": "\u253C" }, + "╪": { "codepoints": [9578], "characters": "\u256A" }, + "╫": { "codepoints": [9579], "characters": "\u256B" }, + "╬": { "codepoints": [9580], "characters": "\u256C" }, + "┤": { "codepoints": [9508], "characters": "\u2524" }, + "╡": { "codepoints": [9569], "characters": "\u2561" }, + "╢": { "codepoints": [9570], "characters": "\u2562" }, + "╣": { "codepoints": [9571], "characters": "\u2563" }, + "├": { "codepoints": [9500], "characters": "\u251C" }, + "╞": { "codepoints": [9566], "characters": "\u255E" }, + "╟": { "codepoints": [9567], "characters": "\u255F" }, + "╠": { "codepoints": [9568], "characters": "\u2560" }, + "‵": { "codepoints": [8245], "characters": "\u2035" }, + "˘": { "codepoints": [728], "characters": "\u02D8" }, + "˘": { "codepoints": [728], "characters": "\u02D8" }, + "¦": { "codepoints": [166], "characters": "\u00A6" }, + "¦": { "codepoints": [166], "characters": "\u00A6" }, + "𝒷": { "codepoints": [119991], "characters": "\uD835\uDCB7" }, + "ℬ": { "codepoints": [8492], "characters": "\u212C" }, + "⁏": { "codepoints": [8271], "characters": "\u204F" }, + "∽": { "codepoints": [8765], "characters": "\u223D" }, + "⋍": { "codepoints": [8909], "characters": "\u22CD" }, + "⧅": { "codepoints": [10693], "characters": "\u29C5" }, + "\": { "codepoints": [92], "characters": "\u005C" }, + "⟈": { "codepoints": [10184], "characters": "\u27C8" }, + "•": { "codepoints": [8226], "characters": "\u2022" }, + "•": { "codepoints": [8226], "characters": "\u2022" }, + "≎": { "codepoints": [8782], "characters": "\u224E" }, + "⪮": { "codepoints": [10926], "characters": "\u2AAE" }, + "≏": { "codepoints": [8783], "characters": "\u224F" }, + "≎": { "codepoints": [8782], "characters": "\u224E" }, + "≏": { "codepoints": [8783], "characters": "\u224F" }, + "Ć": { "codepoints": [262], "characters": "\u0106" }, + "ć": { "codepoints": [263], "characters": "\u0107" }, + "⩄": { "codepoints": [10820], "characters": "\u2A44" }, + "⩉": { "codepoints": [10825], "characters": "\u2A49" }, + "⩋": { "codepoints": [10827], "characters": "\u2A4B" }, + "∩": { "codepoints": [8745], "characters": "\u2229" }, + "⋒": { "codepoints": [8914], "characters": "\u22D2" }, + "⩇": { "codepoints": [10823], "characters": "\u2A47" }, + "⩀": { "codepoints": [10816], "characters": "\u2A40" }, + "ⅅ": { "codepoints": [8517], "characters": "\u2145" }, + "∩︀": { "codepoints": [8745, 65024], "characters": "\u2229\uFE00" }, + "⁁": { "codepoints": [8257], "characters": "\u2041" }, + "ˇ": { "codepoints": [711], "characters": "\u02C7" }, + "ℭ": { "codepoints": [8493], "characters": "\u212D" }, + "⩍": { "codepoints": [10829], "characters": "\u2A4D" }, + "Č": { "codepoints": [268], "characters": "\u010C" }, + "č": { "codepoints": [269], "characters": "\u010D" }, + "Ç": { "codepoints": [199], "characters": "\u00C7" }, + "Ç": { "codepoints": [199], "characters": "\u00C7" }, + "ç": { "codepoints": [231], "characters": "\u00E7" }, + "ç": { "codepoints": [231], "characters": "\u00E7" }, + "Ĉ": { "codepoints": [264], "characters": "\u0108" }, + "ĉ": { "codepoints": [265], "characters": "\u0109" }, + "∰": { "codepoints": [8752], "characters": "\u2230" }, + "⩌": { "codepoints": [10828], "characters": "\u2A4C" }, + "⩐": { "codepoints": [10832], "characters": "\u2A50" }, + "Ċ": { "codepoints": [266], "characters": "\u010A" }, + "ċ": { "codepoints": [267], "characters": "\u010B" }, + "¸": { "codepoints": [184], "characters": "\u00B8" }, + "¸": { "codepoints": [184], "characters": "\u00B8" }, + "¸": { "codepoints": [184], "characters": "\u00B8" }, + "⦲": { "codepoints": [10674], "characters": "\u29B2" }, + "¢": { "codepoints": [162], "characters": "\u00A2" }, + "¢": { "codepoints": [162], "characters": "\u00A2" }, + "·": { "codepoints": [183], "characters": "\u00B7" }, + "·": { "codepoints": [183], "characters": "\u00B7" }, + "𝔠": { "codepoints": [120096], "characters": "\uD835\uDD20" }, + "ℭ": { "codepoints": [8493], "characters": "\u212D" }, + "Ч": { "codepoints": [1063], "characters": "\u0427" }, + "ч": { "codepoints": [1095], "characters": "\u0447" }, + "✓": { "codepoints": [10003], "characters": "\u2713" }, + "✓": { "codepoints": [10003], "characters": "\u2713" }, + "Χ": { "codepoints": [935], "characters": "\u03A7" }, + "χ": { "codepoints": [967], "characters": "\u03C7" }, + "ˆ": { "codepoints": [710], "characters": "\u02C6" }, + "≗": { "codepoints": [8791], "characters": "\u2257" }, + "↺": { "codepoints": [8634], "characters": "\u21BA" }, + "↻": { "codepoints": [8635], "characters": "\u21BB" }, + "⊛": { "codepoints": [8859], "characters": "\u229B" }, + "⊚": { "codepoints": [8858], "characters": "\u229A" }, + "⊝": { "codepoints": [8861], "characters": "\u229D" }, + "⊙": { "codepoints": [8857], "characters": "\u2299" }, + "®": { "codepoints": [174], "characters": "\u00AE" }, + "Ⓢ": { "codepoints": [9416], "characters": "\u24C8" }, + "⊖": { "codepoints": [8854], "characters": "\u2296" }, + "⊕": { "codepoints": [8853], "characters": "\u2295" }, + "⊗": { "codepoints": [8855], "characters": "\u2297" }, + "○": { "codepoints": [9675], "characters": "\u25CB" }, + "⧃": { "codepoints": [10691], "characters": "\u29C3" }, + "≗": { "codepoints": [8791], "characters": "\u2257" }, + "⨐": { "codepoints": [10768], "characters": "\u2A10" }, + "⫯": { "codepoints": [10991], "characters": "\u2AEF" }, + "⧂": { "codepoints": [10690], "characters": "\u29C2" }, + "∲": { "codepoints": [8754], "characters": "\u2232" }, + "”": { "codepoints": [8221], "characters": "\u201D" }, + "’": { "codepoints": [8217], "characters": "\u2019" }, + "♣": { "codepoints": [9827], "characters": "\u2663" }, + "♣": { "codepoints": [9827], "characters": "\u2663" }, + ":": { "codepoints": [58], "characters": "\u003A" }, + "∷": { "codepoints": [8759], "characters": "\u2237" }, + "⩴": { "codepoints": [10868], "characters": "\u2A74" }, + "≔": { "codepoints": [8788], "characters": "\u2254" }, + "≔": { "codepoints": [8788], "characters": "\u2254" }, + ",": { "codepoints": [44], "characters": "\u002C" }, + "@": { "codepoints": [64], "characters": "\u0040" }, + "∁": { "codepoints": [8705], "characters": "\u2201" }, + "∘": { "codepoints": [8728], "characters": "\u2218" }, + "∁": { "codepoints": [8705], "characters": "\u2201" }, + "ℂ": { "codepoints": [8450], "characters": "\u2102" }, + "≅": { "codepoints": [8773], "characters": "\u2245" }, + "⩭": { "codepoints": [10861], "characters": "\u2A6D" }, + "≡": { "codepoints": [8801], "characters": "\u2261" }, + "∮": { "codepoints": [8750], "characters": "\u222E" }, + "∯": { "codepoints": [8751], "characters": "\u222F" }, + "∮": { "codepoints": [8750], "characters": "\u222E" }, + "𝕔": { "codepoints": [120148], "characters": "\uD835\uDD54" }, + "ℂ": { "codepoints": [8450], "characters": "\u2102" }, + "∐": { "codepoints": [8720], "characters": "\u2210" }, + "∐": { "codepoints": [8720], "characters": "\u2210" }, + "©": { "codepoints": [169], "characters": "\u00A9" }, + "©": { "codepoints": [169], "characters": "\u00A9" }, + "©": { "codepoints": [169], "characters": "\u00A9" }, + "©": { "codepoints": [169], "characters": "\u00A9" }, + "℗": { "codepoints": [8471], "characters": "\u2117" }, + "∳": { "codepoints": [8755], "characters": "\u2233" }, + "↵": { "codepoints": [8629], "characters": "\u21B5" }, + "✗": { "codepoints": [10007], "characters": "\u2717" }, + "⨯": { "codepoints": [10799], "characters": "\u2A2F" }, + "𝒞": { "codepoints": [119966], "characters": "\uD835\uDC9E" }, + "𝒸": { "codepoints": [119992], "characters": "\uD835\uDCB8" }, + "⫏": { "codepoints": [10959], "characters": "\u2ACF" }, + "⫑": { "codepoints": [10961], "characters": "\u2AD1" }, + "⫐": { "codepoints": [10960], "characters": "\u2AD0" }, + "⫒": { "codepoints": [10962], "characters": "\u2AD2" }, + "⋯": { "codepoints": [8943], "characters": "\u22EF" }, + "⤸": { "codepoints": [10552], "characters": "\u2938" }, + "⤵": { "codepoints": [10549], "characters": "\u2935" }, + "⋞": { "codepoints": [8926], "characters": "\u22DE" }, + "⋟": { "codepoints": [8927], "characters": "\u22DF" }, + "↶": { "codepoints": [8630], "characters": "\u21B6" }, + "⤽": { "codepoints": [10557], "characters": "\u293D" }, + "⩈": { "codepoints": [10824], "characters": "\u2A48" }, + "⩆": { "codepoints": [10822], "characters": "\u2A46" }, + "≍": { "codepoints": [8781], "characters": "\u224D" }, + "∪": { "codepoints": [8746], "characters": "\u222A" }, + "⋓": { "codepoints": [8915], "characters": "\u22D3" }, + "⩊": { "codepoints": [10826], "characters": "\u2A4A" }, + "⊍": { "codepoints": [8845], "characters": "\u228D" }, + "⩅": { "codepoints": [10821], "characters": "\u2A45" }, + "∪︀": { "codepoints": [8746, 65024], "characters": "\u222A\uFE00" }, + "↷": { "codepoints": [8631], "characters": "\u21B7" }, + "⤼": { "codepoints": [10556], "characters": "\u293C" }, + "⋞": { "codepoints": [8926], "characters": "\u22DE" }, + "⋟": { "codepoints": [8927], "characters": "\u22DF" }, + "⋎": { "codepoints": [8910], "characters": "\u22CE" }, + "⋏": { "codepoints": [8911], "characters": "\u22CF" }, + "¤": { "codepoints": [164], "characters": "\u00A4" }, + "¤": { "codepoints": [164], "characters": "\u00A4" }, + "↶": { "codepoints": [8630], "characters": "\u21B6" }, + "↷": { "codepoints": [8631], "characters": "\u21B7" }, + "⋎": { "codepoints": [8910], "characters": "\u22CE" }, + "⋏": { "codepoints": [8911], "characters": "\u22CF" }, + "∲": { "codepoints": [8754], "characters": "\u2232" }, + "∱": { "codepoints": [8753], "characters": "\u2231" }, + "⌭": { "codepoints": [9005], "characters": "\u232D" }, + "†": { "codepoints": [8224], "characters": "\u2020" }, + "‡": { "codepoints": [8225], "characters": "\u2021" }, + "ℸ": { "codepoints": [8504], "characters": "\u2138" }, + "↓": { "codepoints": [8595], "characters": "\u2193" }, + "↡": { "codepoints": [8609], "characters": "\u21A1" }, + "⇓": { "codepoints": [8659], "characters": "\u21D3" }, + "‐": { "codepoints": [8208], "characters": "\u2010" }, + "⫤": { "codepoints": [10980], "characters": "\u2AE4" }, + "⊣": { "codepoints": [8867], "characters": "\u22A3" }, + "⤏": { "codepoints": [10511], "characters": "\u290F" }, + "˝": { "codepoints": [733], "characters": "\u02DD" }, + "Ď": { "codepoints": [270], "characters": "\u010E" }, + "ď": { "codepoints": [271], "characters": "\u010F" }, + "Д": { "codepoints": [1044], "characters": "\u0414" }, + "д": { "codepoints": [1076], "characters": "\u0434" }, + "‡": { "codepoints": [8225], "characters": "\u2021" }, + "⇊": { "codepoints": [8650], "characters": "\u21CA" }, + "ⅅ": { "codepoints": [8517], "characters": "\u2145" }, + "ⅆ": { "codepoints": [8518], "characters": "\u2146" }, + "⤑": { "codepoints": [10513], "characters": "\u2911" }, + "⩷": { "codepoints": [10871], "characters": "\u2A77" }, + "°": { "codepoints": [176], "characters": "\u00B0" }, + "°": { "codepoints": [176], "characters": "\u00B0" }, + "∇": { "codepoints": [8711], "characters": "\u2207" }, + "Δ": { "codepoints": [916], "characters": "\u0394" }, + "δ": { "codepoints": [948], "characters": "\u03B4" }, + "⦱": { "codepoints": [10673], "characters": "\u29B1" }, + "⥿": { "codepoints": [10623], "characters": "\u297F" }, + "𝔇": { "codepoints": [120071], "characters": "\uD835\uDD07" }, + "𝔡": { "codepoints": [120097], "characters": "\uD835\uDD21" }, + "⥥": { "codepoints": [10597], "characters": "\u2965" }, + "⇃": { "codepoints": [8643], "characters": "\u21C3" }, + "⇂": { "codepoints": [8642], "characters": "\u21C2" }, + "´": { "codepoints": [180], "characters": "\u00B4" }, + "˙": { "codepoints": [729], "characters": "\u02D9" }, + "˝": { "codepoints": [733], "characters": "\u02DD" }, + "`": { "codepoints": [96], "characters": "\u0060" }, + "˜": { "codepoints": [732], "characters": "\u02DC" }, + "⋄": { "codepoints": [8900], "characters": "\u22C4" }, + "⋄": { "codepoints": [8900], "characters": "\u22C4" }, + "⋄": { "codepoints": [8900], "characters": "\u22C4" }, + "♦": { "codepoints": [9830], "characters": "\u2666" }, + "♦": { "codepoints": [9830], "characters": "\u2666" }, + "¨": { "codepoints": [168], "characters": "\u00A8" }, + "ⅆ": { "codepoints": [8518], "characters": "\u2146" }, + "ϝ": { "codepoints": [989], "characters": "\u03DD" }, + "⋲": { "codepoints": [8946], "characters": "\u22F2" }, + "÷": { "codepoints": [247], "characters": "\u00F7" }, + "÷": { "codepoints": [247], "characters": "\u00F7" }, + "÷": { "codepoints": [247], "characters": "\u00F7" }, + "⋇": { "codepoints": [8903], "characters": "\u22C7" }, + "⋇": { "codepoints": [8903], "characters": "\u22C7" }, + "Ђ": { "codepoints": [1026], "characters": "\u0402" }, + "ђ": { "codepoints": [1106], "characters": "\u0452" }, + "⌞": { "codepoints": [8990], "characters": "\u231E" }, + "⌍": { "codepoints": [8973], "characters": "\u230D" }, + "$": { "codepoints": [36], "characters": "\u0024" }, + "𝔻": { "codepoints": [120123], "characters": "\uD835\uDD3B" }, + "𝕕": { "codepoints": [120149], "characters": "\uD835\uDD55" }, + "¨": { "codepoints": [168], "characters": "\u00A8" }, + "˙": { "codepoints": [729], "characters": "\u02D9" }, + "⃜": { "codepoints": [8412], "characters": "\u20DC" }, + "≐": { "codepoints": [8784], "characters": "\u2250" }, + "≑": { "codepoints": [8785], "characters": "\u2251" }, + "≐": { "codepoints": [8784], "characters": "\u2250" }, + "∸": { "codepoints": [8760], "characters": "\u2238" }, + "∔": { "codepoints": [8724], "characters": "\u2214" }, + "⊡": { "codepoints": [8865], "characters": "\u22A1" }, + "⌆": { "codepoints": [8966], "characters": "\u2306" }, + "∯": { "codepoints": [8751], "characters": "\u222F" }, + "¨": { "codepoints": [168], "characters": "\u00A8" }, + "⇓": { "codepoints": [8659], "characters": "\u21D3" }, + "⇐": { "codepoints": [8656], "characters": "\u21D0" }, + "⇔": { "codepoints": [8660], "characters": "\u21D4" }, + "⫤": { "codepoints": [10980], "characters": "\u2AE4" }, + "⟸": { "codepoints": [10232], "characters": "\u27F8" }, + "⟺": { "codepoints": [10234], "characters": "\u27FA" }, + "⟹": { "codepoints": [10233], "characters": "\u27F9" }, + "⇒": { "codepoints": [8658], "characters": "\u21D2" }, + "⊨": { "codepoints": [8872], "characters": "\u22A8" }, + "⇑": { "codepoints": [8657], "characters": "\u21D1" }, + "⇕": { "codepoints": [8661], "characters": "\u21D5" }, + "∥": { "codepoints": [8741], "characters": "\u2225" }, + "⤓": { "codepoints": [10515], "characters": "\u2913" }, + "↓": { "codepoints": [8595], "characters": "\u2193" }, + "↓": { "codepoints": [8595], "characters": "\u2193" }, + "⇓": { "codepoints": [8659], "characters": "\u21D3" }, + "⇵": { "codepoints": [8693], "characters": "\u21F5" }, + "̑": { "codepoints": [785], "characters": "\u0311" }, + "⇊": { "codepoints": [8650], "characters": "\u21CA" }, + "⇃": { "codepoints": [8643], "characters": "\u21C3" }, + "⇂": { "codepoints": [8642], "characters": "\u21C2" }, + "⥐": { "codepoints": [10576], "characters": "\u2950" }, + "⥞": { "codepoints": [10590], "characters": "\u295E" }, + "⥖": { "codepoints": [10582], "characters": "\u2956" }, + "↽": { "codepoints": [8637], "characters": "\u21BD" }, + "⥟": { "codepoints": [10591], "characters": "\u295F" }, + "⥗": { "codepoints": [10583], "characters": "\u2957" }, + "⇁": { "codepoints": [8641], "characters": "\u21C1" }, + "↧": { "codepoints": [8615], "characters": "\u21A7" }, + "⊤": { "codepoints": [8868], "characters": "\u22A4" }, + "⤐": { "codepoints": [10512], "characters": "\u2910" }, + "⌟": { "codepoints": [8991], "characters": "\u231F" }, + "⌌": { "codepoints": [8972], "characters": "\u230C" }, + "𝒟": { "codepoints": [119967], "characters": "\uD835\uDC9F" }, + "𝒹": { "codepoints": [119993], "characters": "\uD835\uDCB9" }, + "Ѕ": { "codepoints": [1029], "characters": "\u0405" }, + "ѕ": { "codepoints": [1109], "characters": "\u0455" }, + "⧶": { "codepoints": [10742], "characters": "\u29F6" }, + "Đ": { "codepoints": [272], "characters": "\u0110" }, + "đ": { "codepoints": [273], "characters": "\u0111" }, + "⋱": { "codepoints": [8945], "characters": "\u22F1" }, + "▿": { "codepoints": [9663], "characters": "\u25BF" }, + "▾": { "codepoints": [9662], "characters": "\u25BE" }, + "⇵": { "codepoints": [8693], "characters": "\u21F5" }, + "⥯": { "codepoints": [10607], "characters": "\u296F" }, + "⦦": { "codepoints": [10662], "characters": "\u29A6" }, + "Џ": { "codepoints": [1039], "characters": "\u040F" }, + "џ": { "codepoints": [1119], "characters": "\u045F" }, + "⟿": { "codepoints": [10239], "characters": "\u27FF" }, + "É": { "codepoints": [201], "characters": "\u00C9" }, + "É": { "codepoints": [201], "characters": "\u00C9" }, + "é": { "codepoints": [233], "characters": "\u00E9" }, + "é": { "codepoints": [233], "characters": "\u00E9" }, + "⩮": { "codepoints": [10862], "characters": "\u2A6E" }, + "Ě": { "codepoints": [282], "characters": "\u011A" }, + "ě": { "codepoints": [283], "characters": "\u011B" }, + "Ê": { "codepoints": [202], "characters": "\u00CA" }, + "Ê": { "codepoints": [202], "characters": "\u00CA" }, + "ê": { "codepoints": [234], "characters": "\u00EA" }, + "ê": { "codepoints": [234], "characters": "\u00EA" }, + "≖": { "codepoints": [8790], "characters": "\u2256" }, + "≕": { "codepoints": [8789], "characters": "\u2255" }, + "Э": { "codepoints": [1069], "characters": "\u042D" }, + "э": { "codepoints": [1101], "characters": "\u044D" }, + "⩷": { "codepoints": [10871], "characters": "\u2A77" }, + "Ė": { "codepoints": [278], "characters": "\u0116" }, + "ė": { "codepoints": [279], "characters": "\u0117" }, + "≑": { "codepoints": [8785], "characters": "\u2251" }, + "ⅇ": { "codepoints": [8519], "characters": "\u2147" }, + "≒": { "codepoints": [8786], "characters": "\u2252" }, + "𝔈": { "codepoints": [120072], "characters": "\uD835\uDD08" }, + "𝔢": { "codepoints": [120098], "characters": "\uD835\uDD22" }, + "⪚": { "codepoints": [10906], "characters": "\u2A9A" }, + "È": { "codepoints": [200], "characters": "\u00C8" }, + "È": { "codepoints": [200], "characters": "\u00C8" }, + "è": { "codepoints": [232], "characters": "\u00E8" }, + "è": { "codepoints": [232], "characters": "\u00E8" }, + "⪖": { "codepoints": [10902], "characters": "\u2A96" }, + "⪘": { "codepoints": [10904], "characters": "\u2A98" }, + "⪙": { "codepoints": [10905], "characters": "\u2A99" }, + "∈": { "codepoints": [8712], "characters": "\u2208" }, + "⏧": { "codepoints": [9191], "characters": "\u23E7" }, + "ℓ": { "codepoints": [8467], "characters": "\u2113" }, + "⪕": { "codepoints": [10901], "characters": "\u2A95" }, + "⪗": { "codepoints": [10903], "characters": "\u2A97" }, + "Ē": { "codepoints": [274], "characters": "\u0112" }, + "ē": { "codepoints": [275], "characters": "\u0113" }, + "∅": { "codepoints": [8709], "characters": "\u2205" }, + "∅": { "codepoints": [8709], "characters": "\u2205" }, + "◻": { "codepoints": [9723], "characters": "\u25FB" }, + "∅": { "codepoints": [8709], "characters": "\u2205" }, + "▫": { "codepoints": [9643], "characters": "\u25AB" }, + " ": { "codepoints": [8196], "characters": "\u2004" }, + " ": { "codepoints": [8197], "characters": "\u2005" }, + " ": { "codepoints": [8195], "characters": "\u2003" }, + "Ŋ": { "codepoints": [330], "characters": "\u014A" }, + "ŋ": { "codepoints": [331], "characters": "\u014B" }, + " ": { "codepoints": [8194], "characters": "\u2002" }, + "Ę": { "codepoints": [280], "characters": "\u0118" }, + "ę": { "codepoints": [281], "characters": "\u0119" }, + "𝔼": { "codepoints": [120124], "characters": "\uD835\uDD3C" }, + "𝕖": { "codepoints": [120150], "characters": "\uD835\uDD56" }, + "⋕": { "codepoints": [8917], "characters": "\u22D5" }, + "⧣": { "codepoints": [10723], "characters": "\u29E3" }, + "⩱": { "codepoints": [10865], "characters": "\u2A71" }, + "ε": { "codepoints": [949], "characters": "\u03B5" }, + "Ε": { "codepoints": [917], "characters": "\u0395" }, + "ε": { "codepoints": [949], "characters": "\u03B5" }, + "ϵ": { "codepoints": [1013], "characters": "\u03F5" }, + "≖": { "codepoints": [8790], "characters": "\u2256" }, + "≕": { "codepoints": [8789], "characters": "\u2255" }, + "≂": { "codepoints": [8770], "characters": "\u2242" }, + "⪖": { "codepoints": [10902], "characters": "\u2A96" }, + "⪕": { "codepoints": [10901], "characters": "\u2A95" }, + "⩵": { "codepoints": [10869], "characters": "\u2A75" }, + "=": { "codepoints": [61], "characters": "\u003D" }, + "≂": { "codepoints": [8770], "characters": "\u2242" }, + "≟": { "codepoints": [8799], "characters": "\u225F" }, + "⇌": { "codepoints": [8652], "characters": "\u21CC" }, + "≡": { "codepoints": [8801], "characters": "\u2261" }, + "⩸": { "codepoints": [10872], "characters": "\u2A78" }, + "⧥": { "codepoints": [10725], "characters": "\u29E5" }, + "⥱": { "codepoints": [10609], "characters": "\u2971" }, + "≓": { "codepoints": [8787], "characters": "\u2253" }, + "ℯ": { "codepoints": [8495], "characters": "\u212F" }, + "ℰ": { "codepoints": [8496], "characters": "\u2130" }, + "≐": { "codepoints": [8784], "characters": "\u2250" }, + "⩳": { "codepoints": [10867], "characters": "\u2A73" }, + "≂": { "codepoints": [8770], "characters": "\u2242" }, + "Η": { "codepoints": [919], "characters": "\u0397" }, + "η": { "codepoints": [951], "characters": "\u03B7" }, + "Ð": { "codepoints": [208], "characters": "\u00D0" }, + "Ð": { "codepoints": [208], "characters": "\u00D0" }, + "ð": { "codepoints": [240], "characters": "\u00F0" }, + "ð": { "codepoints": [240], "characters": "\u00F0" }, + "Ë": { "codepoints": [203], "characters": "\u00CB" }, + "Ë": { "codepoints": [203], "characters": "\u00CB" }, + "ë": { "codepoints": [235], "characters": "\u00EB" }, + "ë": { "codepoints": [235], "characters": "\u00EB" }, + "€": { "codepoints": [8364], "characters": "\u20AC" }, + "!": { "codepoints": [33], "characters": "\u0021" }, + "∃": { "codepoints": [8707], "characters": "\u2203" }, + "∃": { "codepoints": [8707], "characters": "\u2203" }, + "ℰ": { "codepoints": [8496], "characters": "\u2130" }, + "ⅇ": { "codepoints": [8519], "characters": "\u2147" }, + "ⅇ": { "codepoints": [8519], "characters": "\u2147" }, + "≒": { "codepoints": [8786], "characters": "\u2252" }, + "Ф": { "codepoints": [1060], "characters": "\u0424" }, + "ф": { "codepoints": [1092], "characters": "\u0444" }, + "♀": { "codepoints": [9792], "characters": "\u2640" }, + "ffi": { "codepoints": [64259], "characters": "\uFB03" }, + "ff": { "codepoints": [64256], "characters": "\uFB00" }, + "ffl": { "codepoints": [64260], "characters": "\uFB04" }, + "𝔉": { "codepoints": [120073], "characters": "\uD835\uDD09" }, + "𝔣": { "codepoints": [120099], "characters": "\uD835\uDD23" }, + "fi": { "codepoints": [64257], "characters": "\uFB01" }, + "◼": { "codepoints": [9724], "characters": "\u25FC" }, + "▪": { "codepoints": [9642], "characters": "\u25AA" }, + "fj": { "codepoints": [102, 106], "characters": "\u0066\u006A" }, + "♭": { "codepoints": [9837], "characters": "\u266D" }, + "fl": { "codepoints": [64258], "characters": "\uFB02" }, + "▱": { "codepoints": [9649], "characters": "\u25B1" }, + "ƒ": { "codepoints": [402], "characters": "\u0192" }, + "𝔽": { "codepoints": [120125], "characters": "\uD835\uDD3D" }, + "𝕗": { "codepoints": [120151], "characters": "\uD835\uDD57" }, + "∀": { "codepoints": [8704], "characters": "\u2200" }, + "∀": { "codepoints": [8704], "characters": "\u2200" }, + "⋔": { "codepoints": [8916], "characters": "\u22D4" }, + "⫙": { "codepoints": [10969], "characters": "\u2AD9" }, + "ℱ": { "codepoints": [8497], "characters": "\u2131" }, + "⨍": { "codepoints": [10765], "characters": "\u2A0D" }, + "½": { "codepoints": [189], "characters": "\u00BD" }, + "½": { "codepoints": [189], "characters": "\u00BD" }, + "⅓": { "codepoints": [8531], "characters": "\u2153" }, + "¼": { "codepoints": [188], "characters": "\u00BC" }, + "¼": { "codepoints": [188], "characters": "\u00BC" }, + "⅕": { "codepoints": [8533], "characters": "\u2155" }, + "⅙": { "codepoints": [8537], "characters": "\u2159" }, + "⅛": { "codepoints": [8539], "characters": "\u215B" }, + "⅔": { "codepoints": [8532], "characters": "\u2154" }, + "⅖": { "codepoints": [8534], "characters": "\u2156" }, + "¾": { "codepoints": [190], "characters": "\u00BE" }, + "¾": { "codepoints": [190], "characters": "\u00BE" }, + "⅗": { "codepoints": [8535], "characters": "\u2157" }, + "⅜": { "codepoints": [8540], "characters": "\u215C" }, + "⅘": { "codepoints": [8536], "characters": "\u2158" }, + "⅚": { "codepoints": [8538], "characters": "\u215A" }, + "⅝": { "codepoints": [8541], "characters": "\u215D" }, + "⅞": { "codepoints": [8542], "characters": "\u215E" }, + "⁄": { "codepoints": [8260], "characters": "\u2044" }, + "⌢": { "codepoints": [8994], "characters": "\u2322" }, + "𝒻": { "codepoints": [119995], "characters": "\uD835\uDCBB" }, + "ℱ": { "codepoints": [8497], "characters": "\u2131" }, + "ǵ": { "codepoints": [501], "characters": "\u01F5" }, + "Γ": { "codepoints": [915], "characters": "\u0393" }, + "γ": { "codepoints": [947], "characters": "\u03B3" }, + "Ϝ": { "codepoints": [988], "characters": "\u03DC" }, + "ϝ": { "codepoints": [989], "characters": "\u03DD" }, + "⪆": { "codepoints": [10886], "characters": "\u2A86" }, + "Ğ": { "codepoints": [286], "characters": "\u011E" }, + "ğ": { "codepoints": [287], "characters": "\u011F" }, + "Ģ": { "codepoints": [290], "characters": "\u0122" }, + "Ĝ": { "codepoints": [284], "characters": "\u011C" }, + "ĝ": { "codepoints": [285], "characters": "\u011D" }, + "Г": { "codepoints": [1043], "characters": "\u0413" }, + "г": { "codepoints": [1075], "characters": "\u0433" }, + "Ġ": { "codepoints": [288], "characters": "\u0120" }, + "ġ": { "codepoints": [289], "characters": "\u0121" }, + "≥": { "codepoints": [8805], "characters": "\u2265" }, + "≧": { "codepoints": [8807], "characters": "\u2267" }, + "⪌": { "codepoints": [10892], "characters": "\u2A8C" }, + "⋛": { "codepoints": [8923], "characters": "\u22DB" }, + "≥": { "codepoints": [8805], "characters": "\u2265" }, + "≧": { "codepoints": [8807], "characters": "\u2267" }, + "⩾": { "codepoints": [10878], "characters": "\u2A7E" }, + "⪩": { "codepoints": [10921], "characters": "\u2AA9" }, + "⩾": { "codepoints": [10878], "characters": "\u2A7E" }, + "⪀": { "codepoints": [10880], "characters": "\u2A80" }, + "⪂": { "codepoints": [10882], "characters": "\u2A82" }, + "⪄": { "codepoints": [10884], "characters": "\u2A84" }, + "⋛︀": { "codepoints": [8923, 65024], "characters": "\u22DB\uFE00" }, + "⪔": { "codepoints": [10900], "characters": "\u2A94" }, + "𝔊": { "codepoints": [120074], "characters": "\uD835\uDD0A" }, + "𝔤": { "codepoints": [120100], "characters": "\uD835\uDD24" }, + "≫": { "codepoints": [8811], "characters": "\u226B" }, + "⋙": { "codepoints": [8921], "characters": "\u22D9" }, + "⋙": { "codepoints": [8921], "characters": "\u22D9" }, + "ℷ": { "codepoints": [8503], "characters": "\u2137" }, + "Ѓ": { "codepoints": [1027], "characters": "\u0403" }, + "ѓ": { "codepoints": [1107], "characters": "\u0453" }, + "⪥": { "codepoints": [10917], "characters": "\u2AA5" }, + "≷": { "codepoints": [8823], "characters": "\u2277" }, + "⪒": { "codepoints": [10898], "characters": "\u2A92" }, + "⪤": { "codepoints": [10916], "characters": "\u2AA4" }, + "⪊": { "codepoints": [10890], "characters": "\u2A8A" }, + "⪊": { "codepoints": [10890], "characters": "\u2A8A" }, + "⪈": { "codepoints": [10888], "characters": "\u2A88" }, + "≩": { "codepoints": [8809], "characters": "\u2269" }, + "⪈": { "codepoints": [10888], "characters": "\u2A88" }, + "≩": { "codepoints": [8809], "characters": "\u2269" }, + "⋧": { "codepoints": [8935], "characters": "\u22E7" }, + "𝔾": { "codepoints": [120126], "characters": "\uD835\uDD3E" }, + "𝕘": { "codepoints": [120152], "characters": "\uD835\uDD58" }, + "`": { "codepoints": [96], "characters": "\u0060" }, + "≥": { "codepoints": [8805], "characters": "\u2265" }, + "⋛": { "codepoints": [8923], "characters": "\u22DB" }, + "≧": { "codepoints": [8807], "characters": "\u2267" }, + "⪢": { "codepoints": [10914], "characters": "\u2AA2" }, + "≷": { "codepoints": [8823], "characters": "\u2277" }, + "⩾": { "codepoints": [10878], "characters": "\u2A7E" }, + "≳": { "codepoints": [8819], "characters": "\u2273" }, + "𝒢": { "codepoints": [119970], "characters": "\uD835\uDCA2" }, + "ℊ": { "codepoints": [8458], "characters": "\u210A" }, + "≳": { "codepoints": [8819], "characters": "\u2273" }, + "⪎": { "codepoints": [10894], "characters": "\u2A8E" }, + "⪐": { "codepoints": [10896], "characters": "\u2A90" }, + "⪧": { "codepoints": [10919], "characters": "\u2AA7" }, + "⩺": { "codepoints": [10874], "characters": "\u2A7A" }, + ">": { "codepoints": [62], "characters": "\u003E" }, + ">": { "codepoints": [62], "characters": "\u003E" }, + ">": { "codepoints": [62], "characters": "\u003E" }, + ">": { "codepoints": [62], "characters": "\u003E" }, + "≫": { "codepoints": [8811], "characters": "\u226B" }, + "⋗": { "codepoints": [8919], "characters": "\u22D7" }, + "⦕": { "codepoints": [10645], "characters": "\u2995" }, + "⩼": { "codepoints": [10876], "characters": "\u2A7C" }, + "⪆": { "codepoints": [10886], "characters": "\u2A86" }, + "⥸": { "codepoints": [10616], "characters": "\u2978" }, + "⋗": { "codepoints": [8919], "characters": "\u22D7" }, + "⋛": { "codepoints": [8923], "characters": "\u22DB" }, + "⪌": { "codepoints": [10892], "characters": "\u2A8C" }, + "≷": { "codepoints": [8823], "characters": "\u2277" }, + "≳": { "codepoints": [8819], "characters": "\u2273" }, + "≩︀": { "codepoints": [8809, 65024], "characters": "\u2269\uFE00" }, + "≩︀": { "codepoints": [8809, 65024], "characters": "\u2269\uFE00" }, + "ˇ": { "codepoints": [711], "characters": "\u02C7" }, + " ": { "codepoints": [8202], "characters": "\u200A" }, + "½": { "codepoints": [189], "characters": "\u00BD" }, + "ℋ": { "codepoints": [8459], "characters": "\u210B" }, + "Ъ": { "codepoints": [1066], "characters": "\u042A" }, + "ъ": { "codepoints": [1098], "characters": "\u044A" }, + "⥈": { "codepoints": [10568], "characters": "\u2948" }, + "↔": { "codepoints": [8596], "characters": "\u2194" }, + "⇔": { "codepoints": [8660], "characters": "\u21D4" }, + "↭": { "codepoints": [8621], "characters": "\u21AD" }, + "^": { "codepoints": [94], "characters": "\u005E" }, + "ℏ": { "codepoints": [8463], "characters": "\u210F" }, + "Ĥ": { "codepoints": [292], "characters": "\u0124" }, + "ĥ": { "codepoints": [293], "characters": "\u0125" }, + "♥": { "codepoints": [9829], "characters": "\u2665" }, + "♥": { "codepoints": [9829], "characters": "\u2665" }, + "…": { "codepoints": [8230], "characters": "\u2026" }, + "⊹": { "codepoints": [8889], "characters": "\u22B9" }, + "𝔥": { "codepoints": [120101], "characters": "\uD835\uDD25" }, + "ℌ": { "codepoints": [8460], "characters": "\u210C" }, + "ℋ": { "codepoints": [8459], "characters": "\u210B" }, + "⤥": { "codepoints": [10533], "characters": "\u2925" }, + "⤦": { "codepoints": [10534], "characters": "\u2926" }, + "⇿": { "codepoints": [8703], "characters": "\u21FF" }, + "∻": { "codepoints": [8763], "characters": "\u223B" }, + "↩": { "codepoints": [8617], "characters": "\u21A9" }, + "↪": { "codepoints": [8618], "characters": "\u21AA" }, + "𝕙": { "codepoints": [120153], "characters": "\uD835\uDD59" }, + "ℍ": { "codepoints": [8461], "characters": "\u210D" }, + "―": { "codepoints": [8213], "characters": "\u2015" }, + "─": { "codepoints": [9472], "characters": "\u2500" }, + "𝒽": { "codepoints": [119997], "characters": "\uD835\uDCBD" }, + "ℋ": { "codepoints": [8459], "characters": "\u210B" }, + "ℏ": { "codepoints": [8463], "characters": "\u210F" }, + "Ħ": { "codepoints": [294], "characters": "\u0126" }, + "ħ": { "codepoints": [295], "characters": "\u0127" }, + "≎": { "codepoints": [8782], "characters": "\u224E" }, + "≏": { "codepoints": [8783], "characters": "\u224F" }, + "⁃": { "codepoints": [8259], "characters": "\u2043" }, + "‐": { "codepoints": [8208], "characters": "\u2010" }, + "Í": { "codepoints": [205], "characters": "\u00CD" }, + "Í": { "codepoints": [205], "characters": "\u00CD" }, + "í": { "codepoints": [237], "characters": "\u00ED" }, + "í": { "codepoints": [237], "characters": "\u00ED" }, + "⁣": { "codepoints": [8291], "characters": "\u2063" }, + "Î": { "codepoints": [206], "characters": "\u00CE" }, + "Î": { "codepoints": [206], "characters": "\u00CE" }, + "î": { "codepoints": [238], "characters": "\u00EE" }, + "î": { "codepoints": [238], "characters": "\u00EE" }, + "И": { "codepoints": [1048], "characters": "\u0418" }, + "и": { "codepoints": [1080], "characters": "\u0438" }, + "İ": { "codepoints": [304], "characters": "\u0130" }, + "Е": { "codepoints": [1045], "characters": "\u0415" }, + "е": { "codepoints": [1077], "characters": "\u0435" }, + "¡": { "codepoints": [161], "characters": "\u00A1" }, + "¡": { "codepoints": [161], "characters": "\u00A1" }, + "⇔": { "codepoints": [8660], "characters": "\u21D4" }, + "𝔦": { "codepoints": [120102], "characters": "\uD835\uDD26" }, + "ℑ": { "codepoints": [8465], "characters": "\u2111" }, + "Ì": { "codepoints": [204], "characters": "\u00CC" }, + "Ì": { "codepoints": [204], "characters": "\u00CC" }, + "ì": { "codepoints": [236], "characters": "\u00EC" }, + "ì": { "codepoints": [236], "characters": "\u00EC" }, + "ⅈ": { "codepoints": [8520], "characters": "\u2148" }, + "⨌": { "codepoints": [10764], "characters": "\u2A0C" }, + "∭": { "codepoints": [8749], "characters": "\u222D" }, + "⧜": { "codepoints": [10716], "characters": "\u29DC" }, + "℩": { "codepoints": [8489], "characters": "\u2129" }, + "IJ": { "codepoints": [306], "characters": "\u0132" }, + "ij": { "codepoints": [307], "characters": "\u0133" }, + "Ī": { "codepoints": [298], "characters": "\u012A" }, + "ī": { "codepoints": [299], "characters": "\u012B" }, + "ℑ": { "codepoints": [8465], "characters": "\u2111" }, + "ⅈ": { "codepoints": [8520], "characters": "\u2148" }, + "ℐ": { "codepoints": [8464], "characters": "\u2110" }, + "ℑ": { "codepoints": [8465], "characters": "\u2111" }, + "ı": { "codepoints": [305], "characters": "\u0131" }, + "ℑ": { "codepoints": [8465], "characters": "\u2111" }, + "⊷": { "codepoints": [8887], "characters": "\u22B7" }, + "Ƶ": { "codepoints": [437], "characters": "\u01B5" }, + "⇒": { "codepoints": [8658], "characters": "\u21D2" }, + "℅": { "codepoints": [8453], "characters": "\u2105" }, + "∈": { "codepoints": [8712], "characters": "\u2208" }, + "∞": { "codepoints": [8734], "characters": "\u221E" }, + "⧝": { "codepoints": [10717], "characters": "\u29DD" }, + "ı": { "codepoints": [305], "characters": "\u0131" }, + "⊺": { "codepoints": [8890], "characters": "\u22BA" }, + "∫": { "codepoints": [8747], "characters": "\u222B" }, + "∬": { "codepoints": [8748], "characters": "\u222C" }, + "ℤ": { "codepoints": [8484], "characters": "\u2124" }, + "∫": { "codepoints": [8747], "characters": "\u222B" }, + "⊺": { "codepoints": [8890], "characters": "\u22BA" }, + "⋂": { "codepoints": [8898], "characters": "\u22C2" }, + "⨗": { "codepoints": [10775], "characters": "\u2A17" }, + "⨼": { "codepoints": [10812], "characters": "\u2A3C" }, + "⁣": { "codepoints": [8291], "characters": "\u2063" }, + "⁢": { "codepoints": [8290], "characters": "\u2062" }, + "Ё": { "codepoints": [1025], "characters": "\u0401" }, + "ё": { "codepoints": [1105], "characters": "\u0451" }, + "Į": { "codepoints": [302], "characters": "\u012E" }, + "į": { "codepoints": [303], "characters": "\u012F" }, + "𝕀": { "codepoints": [120128], "characters": "\uD835\uDD40" }, + "𝕚": { "codepoints": [120154], "characters": "\uD835\uDD5A" }, + "Ι": { "codepoints": [921], "characters": "\u0399" }, + "ι": { "codepoints": [953], "characters": "\u03B9" }, + "⨼": { "codepoints": [10812], "characters": "\u2A3C" }, + "¿": { "codepoints": [191], "characters": "\u00BF" }, + "¿": { "codepoints": [191], "characters": "\u00BF" }, + "𝒾": { "codepoints": [119998], "characters": "\uD835\uDCBE" }, + "ℐ": { "codepoints": [8464], "characters": "\u2110" }, + "∈": { "codepoints": [8712], "characters": "\u2208" }, + "⋵": { "codepoints": [8949], "characters": "\u22F5" }, + "⋹": { "codepoints": [8953], "characters": "\u22F9" }, + "⋴": { "codepoints": [8948], "characters": "\u22F4" }, + "⋳": { "codepoints": [8947], "characters": "\u22F3" }, + "∈": { "codepoints": [8712], "characters": "\u2208" }, + "⁢": { "codepoints": [8290], "characters": "\u2062" }, + "Ĩ": { "codepoints": [296], "characters": "\u0128" }, + "ĩ": { "codepoints": [297], "characters": "\u0129" }, + "І": { "codepoints": [1030], "characters": "\u0406" }, + "і": { "codepoints": [1110], "characters": "\u0456" }, + "Ï": { "codepoints": [207], "characters": "\u00CF" }, + "Ï": { "codepoints": [207], "characters": "\u00CF" }, + "ï": { "codepoints": [239], "characters": "\u00EF" }, + "ï": { "codepoints": [239], "characters": "\u00EF" }, + "Ĵ": { "codepoints": [308], "characters": "\u0134" }, + "ĵ": { "codepoints": [309], "characters": "\u0135" }, + "Й": { "codepoints": [1049], "characters": "\u0419" }, + "й": { "codepoints": [1081], "characters": "\u0439" }, + "𝔍": { "codepoints": [120077], "characters": "\uD835\uDD0D" }, + "𝔧": { "codepoints": [120103], "characters": "\uD835\uDD27" }, + "ȷ": { "codepoints": [567], "characters": "\u0237" }, + "𝕁": { "codepoints": [120129], "characters": "\uD835\uDD41" }, + "𝕛": { "codepoints": [120155], "characters": "\uD835\uDD5B" }, + "𝒥": { "codepoints": [119973], "characters": "\uD835\uDCA5" }, + "𝒿": { "codepoints": [119999], "characters": "\uD835\uDCBF" }, + "Ј": { "codepoints": [1032], "characters": "\u0408" }, + "ј": { "codepoints": [1112], "characters": "\u0458" }, + "Є": { "codepoints": [1028], "characters": "\u0404" }, + "є": { "codepoints": [1108], "characters": "\u0454" }, + "Κ": { "codepoints": [922], "characters": "\u039A" }, + "κ": { "codepoints": [954], "characters": "\u03BA" }, + "ϰ": { "codepoints": [1008], "characters": "\u03F0" }, + "Ķ": { "codepoints": [310], "characters": "\u0136" }, + "ķ": { "codepoints": [311], "characters": "\u0137" }, + "К": { "codepoints": [1050], "characters": "\u041A" }, + "к": { "codepoints": [1082], "characters": "\u043A" }, + "𝔎": { "codepoints": [120078], "characters": "\uD835\uDD0E" }, + "𝔨": { "codepoints": [120104], "characters": "\uD835\uDD28" }, + "ĸ": { "codepoints": [312], "characters": "\u0138" }, + "Х": { "codepoints": [1061], "characters": "\u0425" }, + "х": { "codepoints": [1093], "characters": "\u0445" }, + "Ќ": { "codepoints": [1036], "characters": "\u040C" }, + "ќ": { "codepoints": [1116], "characters": "\u045C" }, + "𝕂": { "codepoints": [120130], "characters": "\uD835\uDD42" }, + "𝕜": { "codepoints": [120156], "characters": "\uD835\uDD5C" }, + "𝒦": { "codepoints": [119974], "characters": "\uD835\uDCA6" }, + "𝓀": { "codepoints": [120000], "characters": "\uD835\uDCC0" }, + "⇚": { "codepoints": [8666], "characters": "\u21DA" }, + "Ĺ": { "codepoints": [313], "characters": "\u0139" }, + "ĺ": { "codepoints": [314], "characters": "\u013A" }, + "⦴": { "codepoints": [10676], "characters": "\u29B4" }, + "ℒ": { "codepoints": [8466], "characters": "\u2112" }, + "Λ": { "codepoints": [923], "characters": "\u039B" }, + "λ": { "codepoints": [955], "characters": "\u03BB" }, + "⟨": { "codepoints": [10216], "characters": "\u27E8" }, + "⟪": { "codepoints": [10218], "characters": "\u27EA" }, + "⦑": { "codepoints": [10641], "characters": "\u2991" }, + "⟨": { "codepoints": [10216], "characters": "\u27E8" }, + "⪅": { "codepoints": [10885], "characters": "\u2A85" }, + "ℒ": { "codepoints": [8466], "characters": "\u2112" }, + "«": { "codepoints": [171], "characters": "\u00AB" }, + "«": { "codepoints": [171], "characters": "\u00AB" }, + "⇤": { "codepoints": [8676], "characters": "\u21E4" }, + "⤟": { "codepoints": [10527], "characters": "\u291F" }, + "←": { "codepoints": [8592], "characters": "\u2190" }, + "↞": { "codepoints": [8606], "characters": "\u219E" }, + "⇐": { "codepoints": [8656], "characters": "\u21D0" }, + "⤝": { "codepoints": [10525], "characters": "\u291D" }, + "↩": { "codepoints": [8617], "characters": "\u21A9" }, + "↫": { "codepoints": [8619], "characters": "\u21AB" }, + "⤹": { "codepoints": [10553], "characters": "\u2939" }, + "⥳": { "codepoints": [10611], "characters": "\u2973" }, + "↢": { "codepoints": [8610], "characters": "\u21A2" }, + "⤙": { "codepoints": [10521], "characters": "\u2919" }, + "⤛": { "codepoints": [10523], "characters": "\u291B" }, + "⪫": { "codepoints": [10923], "characters": "\u2AAB" }, + "⪭": { "codepoints": [10925], "characters": "\u2AAD" }, + "⪭︀": { "codepoints": [10925, 65024], "characters": "\u2AAD\uFE00" }, + "⤌": { "codepoints": [10508], "characters": "\u290C" }, + "⤎": { "codepoints": [10510], "characters": "\u290E" }, + "❲": { "codepoints": [10098], "characters": "\u2772" }, + "{": { "codepoints": [123], "characters": "\u007B" }, + "[": { "codepoints": [91], "characters": "\u005B" }, + "⦋": { "codepoints": [10635], "characters": "\u298B" }, + "⦏": { "codepoints": [10639], "characters": "\u298F" }, + "⦍": { "codepoints": [10637], "characters": "\u298D" }, + "Ľ": { "codepoints": [317], "characters": "\u013D" }, + "ľ": { "codepoints": [318], "characters": "\u013E" }, + "Ļ": { "codepoints": [315], "characters": "\u013B" }, + "ļ": { "codepoints": [316], "characters": "\u013C" }, + "⌈": { "codepoints": [8968], "characters": "\u2308" }, + "{": { "codepoints": [123], "characters": "\u007B" }, + "Л": { "codepoints": [1051], "characters": "\u041B" }, + "л": { "codepoints": [1083], "characters": "\u043B" }, + "⤶": { "codepoints": [10550], "characters": "\u2936" }, + "“": { "codepoints": [8220], "characters": "\u201C" }, + "„": { "codepoints": [8222], "characters": "\u201E" }, + "⥧": { "codepoints": [10599], "characters": "\u2967" }, + "⥋": { "codepoints": [10571], "characters": "\u294B" }, + "↲": { "codepoints": [8626], "characters": "\u21B2" }, + "≤": { "codepoints": [8804], "characters": "\u2264" }, + "≦": { "codepoints": [8806], "characters": "\u2266" }, + "⟨": { "codepoints": [10216], "characters": "\u27E8" }, + "⇤": { "codepoints": [8676], "characters": "\u21E4" }, + "←": { "codepoints": [8592], "characters": "\u2190" }, + "←": { "codepoints": [8592], "characters": "\u2190" }, + "⇐": { "codepoints": [8656], "characters": "\u21D0" }, + "⇆": { "codepoints": [8646], "characters": "\u21C6" }, + "↢": { "codepoints": [8610], "characters": "\u21A2" }, + "⌈": { "codepoints": [8968], "characters": "\u2308" }, + "⟦": { "codepoints": [10214], "characters": "\u27E6" }, + "⥡": { "codepoints": [10593], "characters": "\u2961" }, + "⥙": { "codepoints": [10585], "characters": "\u2959" }, + "⇃": { "codepoints": [8643], "characters": "\u21C3" }, + "⌊": { "codepoints": [8970], "characters": "\u230A" }, + "↽": { "codepoints": [8637], "characters": "\u21BD" }, + "↼": { "codepoints": [8636], "characters": "\u21BC" }, + "⇇": { "codepoints": [8647], "characters": "\u21C7" }, + "↔": { "codepoints": [8596], "characters": "\u2194" }, + "↔": { "codepoints": [8596], "characters": "\u2194" }, + "⇔": { "codepoints": [8660], "characters": "\u21D4" }, + "⇆": { "codepoints": [8646], "characters": "\u21C6" }, + "⇋": { "codepoints": [8651], "characters": "\u21CB" }, + "↭": { "codepoints": [8621], "characters": "\u21AD" }, + "⥎": { "codepoints": [10574], "characters": "\u294E" }, + "↤": { "codepoints": [8612], "characters": "\u21A4" }, + "⊣": { "codepoints": [8867], "characters": "\u22A3" }, + "⥚": { "codepoints": [10586], "characters": "\u295A" }, + "⋋": { "codepoints": [8907], "characters": "\u22CB" }, + "⧏": { "codepoints": [10703], "characters": "\u29CF" }, + "⊲": { "codepoints": [8882], "characters": "\u22B2" }, + "⊴": { "codepoints": [8884], "characters": "\u22B4" }, + "⥑": { "codepoints": [10577], "characters": "\u2951" }, + "⥠": { "codepoints": [10592], "characters": "\u2960" }, + "⥘": { "codepoints": [10584], "characters": "\u2958" }, + "↿": { "codepoints": [8639], "characters": "\u21BF" }, + "⥒": { "codepoints": [10578], "characters": "\u2952" }, + "↼": { "codepoints": [8636], "characters": "\u21BC" }, + "⪋": { "codepoints": [10891], "characters": "\u2A8B" }, + "⋚": { "codepoints": [8922], "characters": "\u22DA" }, + "≤": { "codepoints": [8804], "characters": "\u2264" }, + "≦": { "codepoints": [8806], "characters": "\u2266" }, + "⩽": { "codepoints": [10877], "characters": "\u2A7D" }, + "⪨": { "codepoints": [10920], "characters": "\u2AA8" }, + "⩽": { "codepoints": [10877], "characters": "\u2A7D" }, + "⩿": { "codepoints": [10879], "characters": "\u2A7F" }, + "⪁": { "codepoints": [10881], "characters": "\u2A81" }, + "⪃": { "codepoints": [10883], "characters": "\u2A83" }, + "⋚︀": { "codepoints": [8922, 65024], "characters": "\u22DA\uFE00" }, + "⪓": { "codepoints": [10899], "characters": "\u2A93" }, + "⪅": { "codepoints": [10885], "characters": "\u2A85" }, + "⋖": { "codepoints": [8918], "characters": "\u22D6" }, + "⋚": { "codepoints": [8922], "characters": "\u22DA" }, + "⪋": { "codepoints": [10891], "characters": "\u2A8B" }, + "⋚": { "codepoints": [8922], "characters": "\u22DA" }, + "≦": { "codepoints": [8806], "characters": "\u2266" }, + "≶": { "codepoints": [8822], "characters": "\u2276" }, + "≶": { "codepoints": [8822], "characters": "\u2276" }, + "⪡": { "codepoints": [10913], "characters": "\u2AA1" }, + "≲": { "codepoints": [8818], "characters": "\u2272" }, + "⩽": { "codepoints": [10877], "characters": "\u2A7D" }, + "≲": { "codepoints": [8818], "characters": "\u2272" }, + "⥼": { "codepoints": [10620], "characters": "\u297C" }, + "⌊": { "codepoints": [8970], "characters": "\u230A" }, + "𝔏": { "codepoints": [120079], "characters": "\uD835\uDD0F" }, + "𝔩": { "codepoints": [120105], "characters": "\uD835\uDD29" }, + "≶": { "codepoints": [8822], "characters": "\u2276" }, + "⪑": { "codepoints": [10897], "characters": "\u2A91" }, + "⥢": { "codepoints": [10594], "characters": "\u2962" }, + "↽": { "codepoints": [8637], "characters": "\u21BD" }, + "↼": { "codepoints": [8636], "characters": "\u21BC" }, + "⥪": { "codepoints": [10602], "characters": "\u296A" }, + "▄": { "codepoints": [9604], "characters": "\u2584" }, + "Љ": { "codepoints": [1033], "characters": "\u0409" }, + "љ": { "codepoints": [1113], "characters": "\u0459" }, + "⇇": { "codepoints": [8647], "characters": "\u21C7" }, + "≪": { "codepoints": [8810], "characters": "\u226A" }, + "⋘": { "codepoints": [8920], "characters": "\u22D8" }, + "⌞": { "codepoints": [8990], "characters": "\u231E" }, + "⇚": { "codepoints": [8666], "characters": "\u21DA" }, + "⥫": { "codepoints": [10603], "characters": "\u296B" }, + "◺": { "codepoints": [9722], "characters": "\u25FA" }, + "Ŀ": { "codepoints": [319], "characters": "\u013F" }, + "ŀ": { "codepoints": [320], "characters": "\u0140" }, + "⎰": { "codepoints": [9136], "characters": "\u23B0" }, + "⎰": { "codepoints": [9136], "characters": "\u23B0" }, + "⪉": { "codepoints": [10889], "characters": "\u2A89" }, + "⪉": { "codepoints": [10889], "characters": "\u2A89" }, + "⪇": { "codepoints": [10887], "characters": "\u2A87" }, + "≨": { "codepoints": [8808], "characters": "\u2268" }, + "⪇": { "codepoints": [10887], "characters": "\u2A87" }, + "≨": { "codepoints": [8808], "characters": "\u2268" }, + "⋦": { "codepoints": [8934], "characters": "\u22E6" }, + "⟬": { "codepoints": [10220], "characters": "\u27EC" }, + "⇽": { "codepoints": [8701], "characters": "\u21FD" }, + "⟦": { "codepoints": [10214], "characters": "\u27E6" }, + "⟵": { "codepoints": [10229], "characters": "\u27F5" }, + "⟵": { "codepoints": [10229], "characters": "\u27F5" }, + "⟸": { "codepoints": [10232], "characters": "\u27F8" }, + "⟷": { "codepoints": [10231], "characters": "\u27F7" }, + "⟷": { "codepoints": [10231], "characters": "\u27F7" }, + "⟺": { "codepoints": [10234], "characters": "\u27FA" }, + "⟼": { "codepoints": [10236], "characters": "\u27FC" }, + "⟶": { "codepoints": [10230], "characters": "\u27F6" }, + "⟶": { "codepoints": [10230], "characters": "\u27F6" }, + "⟹": { "codepoints": [10233], "characters": "\u27F9" }, + "↫": { "codepoints": [8619], "characters": "\u21AB" }, + "↬": { "codepoints": [8620], "characters": "\u21AC" }, + "⦅": { "codepoints": [10629], "characters": "\u2985" }, + "𝕃": { "codepoints": [120131], "characters": "\uD835\uDD43" }, + "𝕝": { "codepoints": [120157], "characters": "\uD835\uDD5D" }, + "⨭": { "codepoints": [10797], "characters": "\u2A2D" }, + "⨴": { "codepoints": [10804], "characters": "\u2A34" }, + "∗": { "codepoints": [8727], "characters": "\u2217" }, + "_": { "codepoints": [95], "characters": "\u005F" }, + "↙": { "codepoints": [8601], "characters": "\u2199" }, + "↘": { "codepoints": [8600], "characters": "\u2198" }, + "◊": { "codepoints": [9674], "characters": "\u25CA" }, + "◊": { "codepoints": [9674], "characters": "\u25CA" }, + "⧫": { "codepoints": [10731], "characters": "\u29EB" }, + "(": { "codepoints": [40], "characters": "\u0028" }, + "⦓": { "codepoints": [10643], "characters": "\u2993" }, + "⇆": { "codepoints": [8646], "characters": "\u21C6" }, + "⌟": { "codepoints": [8991], "characters": "\u231F" }, + "⇋": { "codepoints": [8651], "characters": "\u21CB" }, + "⥭": { "codepoints": [10605], "characters": "\u296D" }, + "‎": { "codepoints": [8206], "characters": "\u200E" }, + "⊿": { "codepoints": [8895], "characters": "\u22BF" }, + "‹": { "codepoints": [8249], "characters": "\u2039" }, + "𝓁": { "codepoints": [120001], "characters": "\uD835\uDCC1" }, + "ℒ": { "codepoints": [8466], "characters": "\u2112" }, + "↰": { "codepoints": [8624], "characters": "\u21B0" }, + "↰": { "codepoints": [8624], "characters": "\u21B0" }, + "≲": { "codepoints": [8818], "characters": "\u2272" }, + "⪍": { "codepoints": [10893], "characters": "\u2A8D" }, + "⪏": { "codepoints": [10895], "characters": "\u2A8F" }, + "[": { "codepoints": [91], "characters": "\u005B" }, + "‘": { "codepoints": [8216], "characters": "\u2018" }, + "‚": { "codepoints": [8218], "characters": "\u201A" }, + "Ł": { "codepoints": [321], "characters": "\u0141" }, + "ł": { "codepoints": [322], "characters": "\u0142" }, + "⪦": { "codepoints": [10918], "characters": "\u2AA6" }, + "⩹": { "codepoints": [10873], "characters": "\u2A79" }, + "<": { "codepoints": [60], "characters": "\u003C" }, + "<": { "codepoints": [60], "characters": "\u003C" }, + "<": { "codepoints": [60], "characters": "\u003C" }, + "<": { "codepoints": [60], "characters": "\u003C" }, + "≪": { "codepoints": [8810], "characters": "\u226A" }, + "⋖": { "codepoints": [8918], "characters": "\u22D6" }, + "⋋": { "codepoints": [8907], "characters": "\u22CB" }, + "⋉": { "codepoints": [8905], "characters": "\u22C9" }, + "⥶": { "codepoints": [10614], "characters": "\u2976" }, + "⩻": { "codepoints": [10875], "characters": "\u2A7B" }, + "◃": { "codepoints": [9667], "characters": "\u25C3" }, + "⊴": { "codepoints": [8884], "characters": "\u22B4" }, + "◂": { "codepoints": [9666], "characters": "\u25C2" }, + "⦖": { "codepoints": [10646], "characters": "\u2996" }, + "⥊": { "codepoints": [10570], "characters": "\u294A" }, + "⥦": { "codepoints": [10598], "characters": "\u2966" }, + "≨︀": { "codepoints": [8808, 65024], "characters": "\u2268\uFE00" }, + "≨︀": { "codepoints": [8808, 65024], "characters": "\u2268\uFE00" }, + "¯": { "codepoints": [175], "characters": "\u00AF" }, + "¯": { "codepoints": [175], "characters": "\u00AF" }, + "♂": { "codepoints": [9794], "characters": "\u2642" }, + "✠": { "codepoints": [10016], "characters": "\u2720" }, + "✠": { "codepoints": [10016], "characters": "\u2720" }, + "⤅": { "codepoints": [10501], "characters": "\u2905" }, + "↦": { "codepoints": [8614], "characters": "\u21A6" }, + "↦": { "codepoints": [8614], "characters": "\u21A6" }, + "↧": { "codepoints": [8615], "characters": "\u21A7" }, + "↤": { "codepoints": [8612], "characters": "\u21A4" }, + "↥": { "codepoints": [8613], "characters": "\u21A5" }, + "▮": { "codepoints": [9646], "characters": "\u25AE" }, + "⨩": { "codepoints": [10793], "characters": "\u2A29" }, + "М": { "codepoints": [1052], "characters": "\u041C" }, + "м": { "codepoints": [1084], "characters": "\u043C" }, + "—": { "codepoints": [8212], "characters": "\u2014" }, + "∺": { "codepoints": [8762], "characters": "\u223A" }, + "∡": { "codepoints": [8737], "characters": "\u2221" }, + " ": { "codepoints": [8287], "characters": "\u205F" }, + "ℳ": { "codepoints": [8499], "characters": "\u2133" }, + "𝔐": { "codepoints": [120080], "characters": "\uD835\uDD10" }, + "𝔪": { "codepoints": [120106], "characters": "\uD835\uDD2A" }, + "℧": { "codepoints": [8487], "characters": "\u2127" }, + "µ": { "codepoints": [181], "characters": "\u00B5" }, + "µ": { "codepoints": [181], "characters": "\u00B5" }, + "*": { "codepoints": [42], "characters": "\u002A" }, + "⫰": { "codepoints": [10992], "characters": "\u2AF0" }, + "∣": { "codepoints": [8739], "characters": "\u2223" }, + "·": { "codepoints": [183], "characters": "\u00B7" }, + "·": { "codepoints": [183], "characters": "\u00B7" }, + "⊟": { "codepoints": [8863], "characters": "\u229F" }, + "−": { "codepoints": [8722], "characters": "\u2212" }, + "∸": { "codepoints": [8760], "characters": "\u2238" }, + "⨪": { "codepoints": [10794], "characters": "\u2A2A" }, + "∓": { "codepoints": [8723], "characters": "\u2213" }, + "⫛": { "codepoints": [10971], "characters": "\u2ADB" }, + "…": { "codepoints": [8230], "characters": "\u2026" }, + "∓": { "codepoints": [8723], "characters": "\u2213" }, + "⊧": { "codepoints": [8871], "characters": "\u22A7" }, + "𝕄": { "codepoints": [120132], "characters": "\uD835\uDD44" }, + "𝕞": { "codepoints": [120158], "characters": "\uD835\uDD5E" }, + "∓": { "codepoints": [8723], "characters": "\u2213" }, + "𝓂": { "codepoints": [120002], "characters": "\uD835\uDCC2" }, + "ℳ": { "codepoints": [8499], "characters": "\u2133" }, + "∾": { "codepoints": [8766], "characters": "\u223E" }, + "Μ": { "codepoints": [924], "characters": "\u039C" }, + "μ": { "codepoints": [956], "characters": "\u03BC" }, + "⊸": { "codepoints": [8888], "characters": "\u22B8" }, + "⊸": { "codepoints": [8888], "characters": "\u22B8" }, + "∇": { "codepoints": [8711], "characters": "\u2207" }, + "Ń": { "codepoints": [323], "characters": "\u0143" }, + "ń": { "codepoints": [324], "characters": "\u0144" }, + "∠⃒": { "codepoints": [8736, 8402], "characters": "\u2220\u20D2" }, + "≉": { "codepoints": [8777], "characters": "\u2249" }, + "⩰̸": { "codepoints": [10864, 824], "characters": "\u2A70\u0338" }, + "≋̸": { "codepoints": [8779, 824], "characters": "\u224B\u0338" }, + "ʼn": { "codepoints": [329], "characters": "\u0149" }, + "≉": { "codepoints": [8777], "characters": "\u2249" }, + "♮": { "codepoints": [9838], "characters": "\u266E" }, + "ℕ": { "codepoints": [8469], "characters": "\u2115" }, + "♮": { "codepoints": [9838], "characters": "\u266E" }, + " ": { "codepoints": [160], "characters": "\u00A0" }, + " ": { "codepoints": [160], "characters": "\u00A0" }, + "≎̸": { "codepoints": [8782, 824], "characters": "\u224E\u0338" }, + "≏̸": { "codepoints": [8783, 824], "characters": "\u224F\u0338" }, + "⩃": { "codepoints": [10819], "characters": "\u2A43" }, + "Ň": { "codepoints": [327], "characters": "\u0147" }, + "ň": { "codepoints": [328], "characters": "\u0148" }, + "Ņ": { "codepoints": [325], "characters": "\u0145" }, + "ņ": { "codepoints": [326], "characters": "\u0146" }, + "≇": { "codepoints": [8775], "characters": "\u2247" }, + "⩭̸": { "codepoints": [10861, 824], "characters": "\u2A6D\u0338" }, + "⩂": { "codepoints": [10818], "characters": "\u2A42" }, + "Н": { "codepoints": [1053], "characters": "\u041D" }, + "н": { "codepoints": [1085], "characters": "\u043D" }, + "–": { "codepoints": [8211], "characters": "\u2013" }, + "⤤": { "codepoints": [10532], "characters": "\u2924" }, + "↗": { "codepoints": [8599], "characters": "\u2197" }, + "⇗": { "codepoints": [8663], "characters": "\u21D7" }, + "↗": { "codepoints": [8599], "characters": "\u2197" }, + "≠": { "codepoints": [8800], "characters": "\u2260" }, + "≐̸": { "codepoints": [8784, 824], "characters": "\u2250\u0338" }, + "​": { "codepoints": [8203], "characters": "\u200B" }, + "​": { "codepoints": [8203], "characters": "\u200B" }, + "​": { "codepoints": [8203], "characters": "\u200B" }, + "​": { "codepoints": [8203], "characters": "\u200B" }, + "≢": { "codepoints": [8802], "characters": "\u2262" }, + "⤨": { "codepoints": [10536], "characters": "\u2928" }, + "≂̸": { "codepoints": [8770, 824], "characters": "\u2242\u0338" }, + "≫": { "codepoints": [8811], "characters": "\u226B" }, + "≪": { "codepoints": [8810], "characters": "\u226A" }, + " ": { "codepoints": [10], "characters": "\u000A" }, + "∄": { "codepoints": [8708], "characters": "\u2204" }, + "∄": { "codepoints": [8708], "characters": "\u2204" }, + "𝔑": { "codepoints": [120081], "characters": "\uD835\uDD11" }, + "𝔫": { "codepoints": [120107], "characters": "\uD835\uDD2B" }, + "≧̸": { "codepoints": [8807, 824], "characters": "\u2267\u0338" }, + "≱": { "codepoints": [8817], "characters": "\u2271" }, + "≱": { "codepoints": [8817], "characters": "\u2271" }, + "≧̸": { "codepoints": [8807, 824], "characters": "\u2267\u0338" }, + "⩾̸": { "codepoints": [10878, 824], "characters": "\u2A7E\u0338" }, + "⩾̸": { "codepoints": [10878, 824], "characters": "\u2A7E\u0338" }, + "⋙̸": { "codepoints": [8921, 824], "characters": "\u22D9\u0338" }, + "≵": { "codepoints": [8821], "characters": "\u2275" }, + "≫⃒": { "codepoints": [8811, 8402], "characters": "\u226B\u20D2" }, + "≯": { "codepoints": [8815], "characters": "\u226F" }, + "≯": { "codepoints": [8815], "characters": "\u226F" }, + "≫̸": { "codepoints": [8811, 824], "characters": "\u226B\u0338" }, + "↮": { "codepoints": [8622], "characters": "\u21AE" }, + "⇎": { "codepoints": [8654], "characters": "\u21CE" }, + "⫲": { "codepoints": [10994], "characters": "\u2AF2" }, + "∋": { "codepoints": [8715], "characters": "\u220B" }, + "⋼": { "codepoints": [8956], "characters": "\u22FC" }, + "⋺": { "codepoints": [8954], "characters": "\u22FA" }, + "∋": { "codepoints": [8715], "characters": "\u220B" }, + "Њ": { "codepoints": [1034], "characters": "\u040A" }, + "њ": { "codepoints": [1114], "characters": "\u045A" }, + "↚": { "codepoints": [8602], "characters": "\u219A" }, + "⇍": { "codepoints": [8653], "characters": "\u21CD" }, + "‥": { "codepoints": [8229], "characters": "\u2025" }, + "≦̸": { "codepoints": [8806, 824], "characters": "\u2266\u0338" }, + "≰": { "codepoints": [8816], "characters": "\u2270" }, + "↚": { "codepoints": [8602], "characters": "\u219A" }, + "⇍": { "codepoints": [8653], "characters": "\u21CD" }, + "↮": { "codepoints": [8622], "characters": "\u21AE" }, + "⇎": { "codepoints": [8654], "characters": "\u21CE" }, + "≰": { "codepoints": [8816], "characters": "\u2270" }, + "≦̸": { "codepoints": [8806, 824], "characters": "\u2266\u0338" }, + "⩽̸": { "codepoints": [10877, 824], "characters": "\u2A7D\u0338" }, + "⩽̸": { "codepoints": [10877, 824], "characters": "\u2A7D\u0338" }, + "≮": { "codepoints": [8814], "characters": "\u226E" }, + "⋘̸": { "codepoints": [8920, 824], "characters": "\u22D8\u0338" }, + "≴": { "codepoints": [8820], "characters": "\u2274" }, + "≪⃒": { "codepoints": [8810, 8402], "characters": "\u226A\u20D2" }, + "≮": { "codepoints": [8814], "characters": "\u226E" }, + "⋪": { "codepoints": [8938], "characters": "\u22EA" }, + "⋬": { "codepoints": [8940], "characters": "\u22EC" }, + "≪̸": { "codepoints": [8810, 824], "characters": "\u226A\u0338" }, + "∤": { "codepoints": [8740], "characters": "\u2224" }, + "⁠": { "codepoints": [8288], "characters": "\u2060" }, + " ": { "codepoints": [160], "characters": "\u00A0" }, + "𝕟": { "codepoints": [120159], "characters": "\uD835\uDD5F" }, + "ℕ": { "codepoints": [8469], "characters": "\u2115" }, + "⫬": { "codepoints": [10988], "characters": "\u2AEC" }, + "¬": { "codepoints": [172], "characters": "\u00AC" }, + "¬": { "codepoints": [172], "characters": "\u00AC" }, + "≢": { "codepoints": [8802], "characters": "\u2262" }, + "≭": { "codepoints": [8813], "characters": "\u226D" }, + "∦": { "codepoints": [8742], "characters": "\u2226" }, + "∉": { "codepoints": [8713], "characters": "\u2209" }, + "≠": { "codepoints": [8800], "characters": "\u2260" }, + "≂̸": { "codepoints": [8770, 824], "characters": "\u2242\u0338" }, + "∄": { "codepoints": [8708], "characters": "\u2204" }, + "≯": { "codepoints": [8815], "characters": "\u226F" }, + "≱": { "codepoints": [8817], "characters": "\u2271" }, + "≧̸": { "codepoints": [8807, 824], "characters": "\u2267\u0338" }, + "≫̸": { "codepoints": [8811, 824], "characters": "\u226B\u0338" }, + "≹": { "codepoints": [8825], "characters": "\u2279" }, + "⩾̸": { "codepoints": [10878, 824], "characters": "\u2A7E\u0338" }, + "≵": { "codepoints": [8821], "characters": "\u2275" }, + "≎̸": { "codepoints": [8782, 824], "characters": "\u224E\u0338" }, + "≏̸": { "codepoints": [8783, 824], "characters": "\u224F\u0338" }, + "∉": { "codepoints": [8713], "characters": "\u2209" }, + "⋵̸": { "codepoints": [8949, 824], "characters": "\u22F5\u0338" }, + "⋹̸": { "codepoints": [8953, 824], "characters": "\u22F9\u0338" }, + "∉": { "codepoints": [8713], "characters": "\u2209" }, + "⋷": { "codepoints": [8951], "characters": "\u22F7" }, + "⋶": { "codepoints": [8950], "characters": "\u22F6" }, + "⧏̸": { "codepoints": [10703, 824], "characters": "\u29CF\u0338" }, + "⋪": { "codepoints": [8938], "characters": "\u22EA" }, + "⋬": { "codepoints": [8940], "characters": "\u22EC" }, + "≮": { "codepoints": [8814], "characters": "\u226E" }, + "≰": { "codepoints": [8816], "characters": "\u2270" }, + "≸": { "codepoints": [8824], "characters": "\u2278" }, + "≪̸": { "codepoints": [8810, 824], "characters": "\u226A\u0338" }, + "⩽̸": { "codepoints": [10877, 824], "characters": "\u2A7D\u0338" }, + "≴": { "codepoints": [8820], "characters": "\u2274" }, + "⪢̸": { "codepoints": [10914, 824], "characters": "\u2AA2\u0338" }, + "⪡̸": { "codepoints": [10913, 824], "characters": "\u2AA1\u0338" }, + "∌": { "codepoints": [8716], "characters": "\u220C" }, + "∌": { "codepoints": [8716], "characters": "\u220C" }, + "⋾": { "codepoints": [8958], "characters": "\u22FE" }, + "⋽": { "codepoints": [8957], "characters": "\u22FD" }, + "⊀": { "codepoints": [8832], "characters": "\u2280" }, + "⪯̸": { "codepoints": [10927, 824], "characters": "\u2AAF\u0338" }, + "⋠": { "codepoints": [8928], "characters": "\u22E0" }, + "∌": { "codepoints": [8716], "characters": "\u220C" }, + "⧐̸": { "codepoints": [10704, 824], "characters": "\u29D0\u0338" }, + "⋫": { "codepoints": [8939], "characters": "\u22EB" }, + "⋭": { "codepoints": [8941], "characters": "\u22ED" }, + "⊏̸": { "codepoints": [8847, 824], "characters": "\u228F\u0338" }, + "⋢": { "codepoints": [8930], "characters": "\u22E2" }, + "⊐̸": { "codepoints": [8848, 824], "characters": "\u2290\u0338" }, + "⋣": { "codepoints": [8931], "characters": "\u22E3" }, + "⊂⃒": { "codepoints": [8834, 8402], "characters": "\u2282\u20D2" }, + "⊈": { "codepoints": [8840], "characters": "\u2288" }, + "⊁": { "codepoints": [8833], "characters": "\u2281" }, + "⪰̸": { "codepoints": [10928, 824], "characters": "\u2AB0\u0338" }, + "⋡": { "codepoints": [8929], "characters": "\u22E1" }, + "≿̸": { "codepoints": [8831, 824], "characters": "\u227F\u0338" }, + "⊃⃒": { "codepoints": [8835, 8402], "characters": "\u2283\u20D2" }, + "⊉": { "codepoints": [8841], "characters": "\u2289" }, + "≁": { "codepoints": [8769], "characters": "\u2241" }, + "≄": { "codepoints": [8772], "characters": "\u2244" }, + "≇": { "codepoints": [8775], "characters": "\u2247" }, + "≉": { "codepoints": [8777], "characters": "\u2249" }, + "∤": { "codepoints": [8740], "characters": "\u2224" }, + "∦": { "codepoints": [8742], "characters": "\u2226" }, + "∦": { "codepoints": [8742], "characters": "\u2226" }, + "⫽⃥": { "codepoints": [11005, 8421], "characters": "\u2AFD\u20E5" }, + "∂̸": { "codepoints": [8706, 824], "characters": "\u2202\u0338" }, + "⨔": { "codepoints": [10772], "characters": "\u2A14" }, + "⊀": { "codepoints": [8832], "characters": "\u2280" }, + "⋠": { "codepoints": [8928], "characters": "\u22E0" }, + "⊀": { "codepoints": [8832], "characters": "\u2280" }, + "⪯̸": { "codepoints": [10927, 824], "characters": "\u2AAF\u0338" }, + "⪯̸": { "codepoints": [10927, 824], "characters": "\u2AAF\u0338" }, + "⤳̸": { "codepoints": [10547, 824], "characters": "\u2933\u0338" }, + "↛": { "codepoints": [8603], "characters": "\u219B" }, + "⇏": { "codepoints": [8655], "characters": "\u21CF" }, + "↝̸": { "codepoints": [8605, 824], "characters": "\u219D\u0338" }, + "↛": { "codepoints": [8603], "characters": "\u219B" }, + "⇏": { "codepoints": [8655], "characters": "\u21CF" }, + "⋫": { "codepoints": [8939], "characters": "\u22EB" }, + "⋭": { "codepoints": [8941], "characters": "\u22ED" }, + "⊁": { "codepoints": [8833], "characters": "\u2281" }, + "⋡": { "codepoints": [8929], "characters": "\u22E1" }, + "⪰̸": { "codepoints": [10928, 824], "characters": "\u2AB0\u0338" }, + "𝒩": { "codepoints": [119977], "characters": "\uD835\uDCA9" }, + "𝓃": { "codepoints": [120003], "characters": "\uD835\uDCC3" }, + "∤": { "codepoints": [8740], "characters": "\u2224" }, + "∦": { "codepoints": [8742], "characters": "\u2226" }, + "≁": { "codepoints": [8769], "characters": "\u2241" }, + "≄": { "codepoints": [8772], "characters": "\u2244" }, + "≄": { "codepoints": [8772], "characters": "\u2244" }, + "∤": { "codepoints": [8740], "characters": "\u2224" }, + "∦": { "codepoints": [8742], "characters": "\u2226" }, + "⋢": { "codepoints": [8930], "characters": "\u22E2" }, + "⋣": { "codepoints": [8931], "characters": "\u22E3" }, + "⊄": { "codepoints": [8836], "characters": "\u2284" }, + "⫅̸": { "codepoints": [10949, 824], "characters": "\u2AC5\u0338" }, + "⊈": { "codepoints": [8840], "characters": "\u2288" }, + "⊂⃒": { "codepoints": [8834, 8402], "characters": "\u2282\u20D2" }, + "⊈": { "codepoints": [8840], "characters": "\u2288" }, + "⫅̸": { "codepoints": [10949, 824], "characters": "\u2AC5\u0338" }, + "⊁": { "codepoints": [8833], "characters": "\u2281" }, + "⪰̸": { "codepoints": [10928, 824], "characters": "\u2AB0\u0338" }, + "⊅": { "codepoints": [8837], "characters": "\u2285" }, + "⫆̸": { "codepoints": [10950, 824], "characters": "\u2AC6\u0338" }, + "⊉": { "codepoints": [8841], "characters": "\u2289" }, + "⊃⃒": { "codepoints": [8835, 8402], "characters": "\u2283\u20D2" }, + "⊉": { "codepoints": [8841], "characters": "\u2289" }, + "⫆̸": { "codepoints": [10950, 824], "characters": "\u2AC6\u0338" }, + "≹": { "codepoints": [8825], "characters": "\u2279" }, + "Ñ": { "codepoints": [209], "characters": "\u00D1" }, + "Ñ": { "codepoints": [209], "characters": "\u00D1" }, + "ñ": { "codepoints": [241], "characters": "\u00F1" }, + "ñ": { "codepoints": [241], "characters": "\u00F1" }, + "≸": { "codepoints": [8824], "characters": "\u2278" }, + "⋪": { "codepoints": [8938], "characters": "\u22EA" }, + "⋬": { "codepoints": [8940], "characters": "\u22EC" }, + "⋫": { "codepoints": [8939], "characters": "\u22EB" }, + "⋭": { "codepoints": [8941], "characters": "\u22ED" }, + "Ν": { "codepoints": [925], "characters": "\u039D" }, + "ν": { "codepoints": [957], "characters": "\u03BD" }, + "#": { "codepoints": [35], "characters": "\u0023" }, + "№": { "codepoints": [8470], "characters": "\u2116" }, + " ": { "codepoints": [8199], "characters": "\u2007" }, + "≍⃒": { "codepoints": [8781, 8402], "characters": "\u224D\u20D2" }, + "⊬": { "codepoints": [8876], "characters": "\u22AC" }, + "⊭": { "codepoints": [8877], "characters": "\u22AD" }, + "⊮": { "codepoints": [8878], "characters": "\u22AE" }, + "⊯": { "codepoints": [8879], "characters": "\u22AF" }, + "≥⃒": { "codepoints": [8805, 8402], "characters": "\u2265\u20D2" }, + ">⃒": { "codepoints": [62, 8402], "characters": "\u003E\u20D2" }, + "⤄": { "codepoints": [10500], "characters": "\u2904" }, + "⧞": { "codepoints": [10718], "characters": "\u29DE" }, + "⤂": { "codepoints": [10498], "characters": "\u2902" }, + "≤⃒": { "codepoints": [8804, 8402], "characters": "\u2264\u20D2" }, + "<⃒": { "codepoints": [60, 8402], "characters": "\u003C\u20D2" }, + "⊴⃒": { "codepoints": [8884, 8402], "characters": "\u22B4\u20D2" }, + "⤃": { "codepoints": [10499], "characters": "\u2903" }, + "⊵⃒": { "codepoints": [8885, 8402], "characters": "\u22B5\u20D2" }, + "∼⃒": { "codepoints": [8764, 8402], "characters": "\u223C\u20D2" }, + "⤣": { "codepoints": [10531], "characters": "\u2923" }, + "↖": { "codepoints": [8598], "characters": "\u2196" }, + "⇖": { "codepoints": [8662], "characters": "\u21D6" }, + "↖": { "codepoints": [8598], "characters": "\u2196" }, + "⤧": { "codepoints": [10535], "characters": "\u2927" }, + "Ó": { "codepoints": [211], "characters": "\u00D3" }, + "Ó": { "codepoints": [211], "characters": "\u00D3" }, + "ó": { "codepoints": [243], "characters": "\u00F3" }, + "ó": { "codepoints": [243], "characters": "\u00F3" }, + "⊛": { "codepoints": [8859], "characters": "\u229B" }, + "Ô": { "codepoints": [212], "characters": "\u00D4" }, + "Ô": { "codepoints": [212], "characters": "\u00D4" }, + "ô": { "codepoints": [244], "characters": "\u00F4" }, + "ô": { "codepoints": [244], "characters": "\u00F4" }, + "⊚": { "codepoints": [8858], "characters": "\u229A" }, + "О": { "codepoints": [1054], "characters": "\u041E" }, + "о": { "codepoints": [1086], "characters": "\u043E" }, + "⊝": { "codepoints": [8861], "characters": "\u229D" }, + "Ő": { "codepoints": [336], "characters": "\u0150" }, + "ő": { "codepoints": [337], "characters": "\u0151" }, + "⨸": { "codepoints": [10808], "characters": "\u2A38" }, + "⊙": { "codepoints": [8857], "characters": "\u2299" }, + "⦼": { "codepoints": [10684], "characters": "\u29BC" }, + "Œ": { "codepoints": [338], "characters": "\u0152" }, + "œ": { "codepoints": [339], "characters": "\u0153" }, + "⦿": { "codepoints": [10687], "characters": "\u29BF" }, + "𝔒": { "codepoints": [120082], "characters": "\uD835\uDD12" }, + "𝔬": { "codepoints": [120108], "characters": "\uD835\uDD2C" }, + "˛": { "codepoints": [731], "characters": "\u02DB" }, + "Ò": { "codepoints": [210], "characters": "\u00D2" }, + "Ò": { "codepoints": [210], "characters": "\u00D2" }, + "ò": { "codepoints": [242], "characters": "\u00F2" }, + "ò": { "codepoints": [242], "characters": "\u00F2" }, + "⧁": { "codepoints": [10689], "characters": "\u29C1" }, + "⦵": { "codepoints": [10677], "characters": "\u29B5" }, + "Ω": { "codepoints": [937], "characters": "\u03A9" }, + "∮": { "codepoints": [8750], "characters": "\u222E" }, + "↺": { "codepoints": [8634], "characters": "\u21BA" }, + "⦾": { "codepoints": [10686], "characters": "\u29BE" }, + "⦻": { "codepoints": [10683], "characters": "\u29BB" }, + "‾": { "codepoints": [8254], "characters": "\u203E" }, + "⧀": { "codepoints": [10688], "characters": "\u29C0" }, + "Ō": { "codepoints": [332], "characters": "\u014C" }, + "ō": { "codepoints": [333], "characters": "\u014D" }, + "Ω": { "codepoints": [937], "characters": "\u03A9" }, + "ω": { "codepoints": [969], "characters": "\u03C9" }, + "Ο": { "codepoints": [927], "characters": "\u039F" }, + "ο": { "codepoints": [959], "characters": "\u03BF" }, + "⦶": { "codepoints": [10678], "characters": "\u29B6" }, + "⊖": { "codepoints": [8854], "characters": "\u2296" }, + "𝕆": { "codepoints": [120134], "characters": "\uD835\uDD46" }, + "𝕠": { "codepoints": [120160], "characters": "\uD835\uDD60" }, + "⦷": { "codepoints": [10679], "characters": "\u29B7" }, + "“": { "codepoints": [8220], "characters": "\u201C" }, + "‘": { "codepoints": [8216], "characters": "\u2018" }, + "⦹": { "codepoints": [10681], "characters": "\u29B9" }, + "⊕": { "codepoints": [8853], "characters": "\u2295" }, + "↻": { "codepoints": [8635], "characters": "\u21BB" }, + "⩔": { "codepoints": [10836], "characters": "\u2A54" }, + "∨": { "codepoints": [8744], "characters": "\u2228" }, + "⩝": { "codepoints": [10845], "characters": "\u2A5D" }, + "ℴ": { "codepoints": [8500], "characters": "\u2134" }, + "ℴ": { "codepoints": [8500], "characters": "\u2134" }, + "ª": { "codepoints": [170], "characters": "\u00AA" }, + "ª": { "codepoints": [170], "characters": "\u00AA" }, + "º": { "codepoints": [186], "characters": "\u00BA" }, + "º": { "codepoints": [186], "characters": "\u00BA" }, + "⊶": { "codepoints": [8886], "characters": "\u22B6" }, + "⩖": { "codepoints": [10838], "characters": "\u2A56" }, + "⩗": { "codepoints": [10839], "characters": "\u2A57" }, + "⩛": { "codepoints": [10843], "characters": "\u2A5B" }, + "Ⓢ": { "codepoints": [9416], "characters": "\u24C8" }, + "𝒪": { "codepoints": [119978], "characters": "\uD835\uDCAA" }, + "ℴ": { "codepoints": [8500], "characters": "\u2134" }, + "Ø": { "codepoints": [216], "characters": "\u00D8" }, + "Ø": { "codepoints": [216], "characters": "\u00D8" }, + "ø": { "codepoints": [248], "characters": "\u00F8" }, + "ø": { "codepoints": [248], "characters": "\u00F8" }, + "⊘": { "codepoints": [8856], "characters": "\u2298" }, + "Õ": { "codepoints": [213], "characters": "\u00D5" }, + "Õ": { "codepoints": [213], "characters": "\u00D5" }, + "õ": { "codepoints": [245], "characters": "\u00F5" }, + "õ": { "codepoints": [245], "characters": "\u00F5" }, + "⨶": { "codepoints": [10806], "characters": "\u2A36" }, + "⨷": { "codepoints": [10807], "characters": "\u2A37" }, + "⊗": { "codepoints": [8855], "characters": "\u2297" }, + "Ö": { "codepoints": [214], "characters": "\u00D6" }, + "Ö": { "codepoints": [214], "characters": "\u00D6" }, + "ö": { "codepoints": [246], "characters": "\u00F6" }, + "ö": { "codepoints": [246], "characters": "\u00F6" }, + "⌽": { "codepoints": [9021], "characters": "\u233D" }, + "‾": { "codepoints": [8254], "characters": "\u203E" }, + "⏞": { "codepoints": [9182], "characters": "\u23DE" }, + "⎴": { "codepoints": [9140], "characters": "\u23B4" }, + "⏜": { "codepoints": [9180], "characters": "\u23DC" }, + "¶": { "codepoints": [182], "characters": "\u00B6" }, + "¶": { "codepoints": [182], "characters": "\u00B6" }, + "∥": { "codepoints": [8741], "characters": "\u2225" }, + "∥": { "codepoints": [8741], "characters": "\u2225" }, + "⫳": { "codepoints": [10995], "characters": "\u2AF3" }, + "⫽": { "codepoints": [11005], "characters": "\u2AFD" }, + "∂": { "codepoints": [8706], "characters": "\u2202" }, + "∂": { "codepoints": [8706], "characters": "\u2202" }, + "П": { "codepoints": [1055], "characters": "\u041F" }, + "п": { "codepoints": [1087], "characters": "\u043F" }, + "%": { "codepoints": [37], "characters": "\u0025" }, + ".": { "codepoints": [46], "characters": "\u002E" }, + "‰": { "codepoints": [8240], "characters": "\u2030" }, + "⊥": { "codepoints": [8869], "characters": "\u22A5" }, + "‱": { "codepoints": [8241], "characters": "\u2031" }, + "𝔓": { "codepoints": [120083], "characters": "\uD835\uDD13" }, + "𝔭": { "codepoints": [120109], "characters": "\uD835\uDD2D" }, + "Φ": { "codepoints": [934], "characters": "\u03A6" }, + "φ": { "codepoints": [966], "characters": "\u03C6" }, + "ϕ": { "codepoints": [981], "characters": "\u03D5" }, + "ℳ": { "codepoints": [8499], "characters": "\u2133" }, + "☎": { "codepoints": [9742], "characters": "\u260E" }, + "Π": { "codepoints": [928], "characters": "\u03A0" }, + "π": { "codepoints": [960], "characters": "\u03C0" }, + "⋔": { "codepoints": [8916], "characters": "\u22D4" }, + "ϖ": { "codepoints": [982], "characters": "\u03D6" }, + "ℏ": { "codepoints": [8463], "characters": "\u210F" }, + "ℎ": { "codepoints": [8462], "characters": "\u210E" }, + "ℏ": { "codepoints": [8463], "characters": "\u210F" }, + "⨣": { "codepoints": [10787], "characters": "\u2A23" }, + "⊞": { "codepoints": [8862], "characters": "\u229E" }, + "⨢": { "codepoints": [10786], "characters": "\u2A22" }, + "+": { "codepoints": [43], "characters": "\u002B" }, + "∔": { "codepoints": [8724], "characters": "\u2214" }, + "⨥": { "codepoints": [10789], "characters": "\u2A25" }, + "⩲": { "codepoints": [10866], "characters": "\u2A72" }, + "±": { "codepoints": [177], "characters": "\u00B1" }, + "±": { "codepoints": [177], "characters": "\u00B1" }, + "±": { "codepoints": [177], "characters": "\u00B1" }, + "⨦": { "codepoints": [10790], "characters": "\u2A26" }, + "⨧": { "codepoints": [10791], "characters": "\u2A27" }, + "±": { "codepoints": [177], "characters": "\u00B1" }, + "ℌ": { "codepoints": [8460], "characters": "\u210C" }, + "⨕": { "codepoints": [10773], "characters": "\u2A15" }, + "𝕡": { "codepoints": [120161], "characters": "\uD835\uDD61" }, + "ℙ": { "codepoints": [8473], "characters": "\u2119" }, + "£": { "codepoints": [163], "characters": "\u00A3" }, + "£": { "codepoints": [163], "characters": "\u00A3" }, + "⪷": { "codepoints": [10935], "characters": "\u2AB7" }, + "⪻": { "codepoints": [10939], "characters": "\u2ABB" }, + "≺": { "codepoints": [8826], "characters": "\u227A" }, + "≼": { "codepoints": [8828], "characters": "\u227C" }, + "⪷": { "codepoints": [10935], "characters": "\u2AB7" }, + "≺": { "codepoints": [8826], "characters": "\u227A" }, + "≼": { "codepoints": [8828], "characters": "\u227C" }, + "≺": { "codepoints": [8826], "characters": "\u227A" }, + "⪯": { "codepoints": [10927], "characters": "\u2AAF" }, + "≼": { "codepoints": [8828], "characters": "\u227C" }, + "≾": { "codepoints": [8830], "characters": "\u227E" }, + "⪯": { "codepoints": [10927], "characters": "\u2AAF" }, + "⪹": { "codepoints": [10937], "characters": "\u2AB9" }, + "⪵": { "codepoints": [10933], "characters": "\u2AB5" }, + "⋨": { "codepoints": [8936], "characters": "\u22E8" }, + "⪯": { "codepoints": [10927], "characters": "\u2AAF" }, + "⪳": { "codepoints": [10931], "characters": "\u2AB3" }, + "≾": { "codepoints": [8830], "characters": "\u227E" }, + "′": { "codepoints": [8242], "characters": "\u2032" }, + "″": { "codepoints": [8243], "characters": "\u2033" }, + "ℙ": { "codepoints": [8473], "characters": "\u2119" }, + "⪹": { "codepoints": [10937], "characters": "\u2AB9" }, + "⪵": { "codepoints": [10933], "characters": "\u2AB5" }, + "⋨": { "codepoints": [8936], "characters": "\u22E8" }, + "∏": { "codepoints": [8719], "characters": "\u220F" }, + "∏": { "codepoints": [8719], "characters": "\u220F" }, + "⌮": { "codepoints": [9006], "characters": "\u232E" }, + "⌒": { "codepoints": [8978], "characters": "\u2312" }, + "⌓": { "codepoints": [8979], "characters": "\u2313" }, + "∝": { "codepoints": [8733], "characters": "\u221D" }, + "∝": { "codepoints": [8733], "characters": "\u221D" }, + "∷": { "codepoints": [8759], "characters": "\u2237" }, + "∝": { "codepoints": [8733], "characters": "\u221D" }, + "≾": { "codepoints": [8830], "characters": "\u227E" }, + "⊰": { "codepoints": [8880], "characters": "\u22B0" }, + "𝒫": { "codepoints": [119979], "characters": "\uD835\uDCAB" }, + "𝓅": { "codepoints": [120005], "characters": "\uD835\uDCC5" }, + "Ψ": { "codepoints": [936], "characters": "\u03A8" }, + "ψ": { "codepoints": [968], "characters": "\u03C8" }, + " ": { "codepoints": [8200], "characters": "\u2008" }, + "𝔔": { "codepoints": [120084], "characters": "\uD835\uDD14" }, + "𝔮": { "codepoints": [120110], "characters": "\uD835\uDD2E" }, + "⨌": { "codepoints": [10764], "characters": "\u2A0C" }, + "𝕢": { "codepoints": [120162], "characters": "\uD835\uDD62" }, + "ℚ": { "codepoints": [8474], "characters": "\u211A" }, + "⁗": { "codepoints": [8279], "characters": "\u2057" }, + "𝒬": { "codepoints": [119980], "characters": "\uD835\uDCAC" }, + "𝓆": { "codepoints": [120006], "characters": "\uD835\uDCC6" }, + "ℍ": { "codepoints": [8461], "characters": "\u210D" }, + "⨖": { "codepoints": [10774], "characters": "\u2A16" }, + "?": { "codepoints": [63], "characters": "\u003F" }, + "≟": { "codepoints": [8799], "characters": "\u225F" }, + """: { "codepoints": [34], "characters": "\u0022" }, + """: { "codepoints": [34], "characters": "\u0022" }, + """: { "codepoints": [34], "characters": "\u0022" }, + """: { "codepoints": [34], "characters": "\u0022" }, + "⇛": { "codepoints": [8667], "characters": "\u21DB" }, + "∽̱": { "codepoints": [8765, 817], "characters": "\u223D\u0331" }, + "Ŕ": { "codepoints": [340], "characters": "\u0154" }, + "ŕ": { "codepoints": [341], "characters": "\u0155" }, + "√": { "codepoints": [8730], "characters": "\u221A" }, + "⦳": { "codepoints": [10675], "characters": "\u29B3" }, + "⟩": { "codepoints": [10217], "characters": "\u27E9" }, + "⟫": { "codepoints": [10219], "characters": "\u27EB" }, + "⦒": { "codepoints": [10642], "characters": "\u2992" }, + "⦥": { "codepoints": [10661], "characters": "\u29A5" }, + "⟩": { "codepoints": [10217], "characters": "\u27E9" }, + "»": { "codepoints": [187], "characters": "\u00BB" }, + "»": { "codepoints": [187], "characters": "\u00BB" }, + "⥵": { "codepoints": [10613], "characters": "\u2975" }, + "⇥": { "codepoints": [8677], "characters": "\u21E5" }, + "⤠": { "codepoints": [10528], "characters": "\u2920" }, + "⤳": { "codepoints": [10547], "characters": "\u2933" }, + "→": { "codepoints": [8594], "characters": "\u2192" }, + "↠": { "codepoints": [8608], "characters": "\u21A0" }, + "⇒": { "codepoints": [8658], "characters": "\u21D2" }, + "⤞": { "codepoints": [10526], "characters": "\u291E" }, + "↪": { "codepoints": [8618], "characters": "\u21AA" }, + "↬": { "codepoints": [8620], "characters": "\u21AC" }, + "⥅": { "codepoints": [10565], "characters": "\u2945" }, + "⥴": { "codepoints": [10612], "characters": "\u2974" }, + "⤖": { "codepoints": [10518], "characters": "\u2916" }, + "↣": { "codepoints": [8611], "characters": "\u21A3" }, + "↝": { "codepoints": [8605], "characters": "\u219D" }, + "⤚": { "codepoints": [10522], "characters": "\u291A" }, + "⤜": { "codepoints": [10524], "characters": "\u291C" }, + "∶": { "codepoints": [8758], "characters": "\u2236" }, + "ℚ": { "codepoints": [8474], "characters": "\u211A" }, + "⤍": { "codepoints": [10509], "characters": "\u290D" }, + "⤏": { "codepoints": [10511], "characters": "\u290F" }, + "⤐": { "codepoints": [10512], "characters": "\u2910" }, + "❳": { "codepoints": [10099], "characters": "\u2773" }, + "}": { "codepoints": [125], "characters": "\u007D" }, + "]": { "codepoints": [93], "characters": "\u005D" }, + "⦌": { "codepoints": [10636], "characters": "\u298C" }, + "⦎": { "codepoints": [10638], "characters": "\u298E" }, + "⦐": { "codepoints": [10640], "characters": "\u2990" }, + "Ř": { "codepoints": [344], "characters": "\u0158" }, + "ř": { "codepoints": [345], "characters": "\u0159" }, + "Ŗ": { "codepoints": [342], "characters": "\u0156" }, + "ŗ": { "codepoints": [343], "characters": "\u0157" }, + "⌉": { "codepoints": [8969], "characters": "\u2309" }, + "}": { "codepoints": [125], "characters": "\u007D" }, + "Р": { "codepoints": [1056], "characters": "\u0420" }, + "р": { "codepoints": [1088], "characters": "\u0440" }, + "⤷": { "codepoints": [10551], "characters": "\u2937" }, + "⥩": { "codepoints": [10601], "characters": "\u2969" }, + "”": { "codepoints": [8221], "characters": "\u201D" }, + "”": { "codepoints": [8221], "characters": "\u201D" }, + "↳": { "codepoints": [8627], "characters": "\u21B3" }, + "ℜ": { "codepoints": [8476], "characters": "\u211C" }, + "ℛ": { "codepoints": [8475], "characters": "\u211B" }, + "ℜ": { "codepoints": [8476], "characters": "\u211C" }, + "ℝ": { "codepoints": [8477], "characters": "\u211D" }, + "ℜ": { "codepoints": [8476], "characters": "\u211C" }, + "▭": { "codepoints": [9645], "characters": "\u25AD" }, + "®": { "codepoints": [174], "characters": "\u00AE" }, + "®": { "codepoints": [174], "characters": "\u00AE" }, + "®": { "codepoints": [174], "characters": "\u00AE" }, + "®": { "codepoints": [174], "characters": "\u00AE" }, + "∋": { "codepoints": [8715], "characters": "\u220B" }, + "⇋": { "codepoints": [8651], "characters": "\u21CB" }, + "⥯": { "codepoints": [10607], "characters": "\u296F" }, + "⥽": { "codepoints": [10621], "characters": "\u297D" }, + "⌋": { "codepoints": [8971], "characters": "\u230B" }, + "𝔯": { "codepoints": [120111], "characters": "\uD835\uDD2F" }, + "ℜ": { "codepoints": [8476], "characters": "\u211C" }, + "⥤": { "codepoints": [10596], "characters": "\u2964" }, + "⇁": { "codepoints": [8641], "characters": "\u21C1" }, + "⇀": { "codepoints": [8640], "characters": "\u21C0" }, + "⥬": { "codepoints": [10604], "characters": "\u296C" }, + "Ρ": { "codepoints": [929], "characters": "\u03A1" }, + "ρ": { "codepoints": [961], "characters": "\u03C1" }, + "ϱ": { "codepoints": [1009], "characters": "\u03F1" }, + "⟩": { "codepoints": [10217], "characters": "\u27E9" }, + "⇥": { "codepoints": [8677], "characters": "\u21E5" }, + "→": { "codepoints": [8594], "characters": "\u2192" }, + "→": { "codepoints": [8594], "characters": "\u2192" }, + "⇒": { "codepoints": [8658], "characters": "\u21D2" }, + "⇄": { "codepoints": [8644], "characters": "\u21C4" }, + "↣": { "codepoints": [8611], "characters": "\u21A3" }, + "⌉": { "codepoints": [8969], "characters": "\u2309" }, + "⟧": { "codepoints": [10215], "characters": "\u27E7" }, + "⥝": { "codepoints": [10589], "characters": "\u295D" }, + "⥕": { "codepoints": [10581], "characters": "\u2955" }, + "⇂": { "codepoints": [8642], "characters": "\u21C2" }, + "⌋": { "codepoints": [8971], "characters": "\u230B" }, + "⇁": { "codepoints": [8641], "characters": "\u21C1" }, + "⇀": { "codepoints": [8640], "characters": "\u21C0" }, + "⇄": { "codepoints": [8644], "characters": "\u21C4" }, + "⇌": { "codepoints": [8652], "characters": "\u21CC" }, + "⇉": { "codepoints": [8649], "characters": "\u21C9" }, + "↝": { "codepoints": [8605], "characters": "\u219D" }, + "↦": { "codepoints": [8614], "characters": "\u21A6" }, + "⊢": { "codepoints": [8866], "characters": "\u22A2" }, + "⥛": { "codepoints": [10587], "characters": "\u295B" }, + "⋌": { "codepoints": [8908], "characters": "\u22CC" }, + "⧐": { "codepoints": [10704], "characters": "\u29D0" }, + "⊳": { "codepoints": [8883], "characters": "\u22B3" }, + "⊵": { "codepoints": [8885], "characters": "\u22B5" }, + "⥏": { "codepoints": [10575], "characters": "\u294F" }, + "⥜": { "codepoints": [10588], "characters": "\u295C" }, + "⥔": { "codepoints": [10580], "characters": "\u2954" }, + "↾": { "codepoints": [8638], "characters": "\u21BE" }, + "⥓": { "codepoints": [10579], "characters": "\u2953" }, + "⇀": { "codepoints": [8640], "characters": "\u21C0" }, + "˚": { "codepoints": [730], "characters": "\u02DA" }, + "≓": { "codepoints": [8787], "characters": "\u2253" }, + "⇄": { "codepoints": [8644], "characters": "\u21C4" }, + "⇌": { "codepoints": [8652], "characters": "\u21CC" }, + "‏": { "codepoints": [8207], "characters": "\u200F" }, + "⎱": { "codepoints": [9137], "characters": "\u23B1" }, + "⎱": { "codepoints": [9137], "characters": "\u23B1" }, + "⫮": { "codepoints": [10990], "characters": "\u2AEE" }, + "⟭": { "codepoints": [10221], "characters": "\u27ED" }, + "⇾": { "codepoints": [8702], "characters": "\u21FE" }, + "⟧": { "codepoints": [10215], "characters": "\u27E7" }, + "⦆": { "codepoints": [10630], "characters": "\u2986" }, + "𝕣": { "codepoints": [120163], "characters": "\uD835\uDD63" }, + "ℝ": { "codepoints": [8477], "characters": "\u211D" }, + "⨮": { "codepoints": [10798], "characters": "\u2A2E" }, + "⨵": { "codepoints": [10805], "characters": "\u2A35" }, + "⥰": { "codepoints": [10608], "characters": "\u2970" }, + ")": { "codepoints": [41], "characters": "\u0029" }, + "⦔": { "codepoints": [10644], "characters": "\u2994" }, + "⨒": { "codepoints": [10770], "characters": "\u2A12" }, + "⇉": { "codepoints": [8649], "characters": "\u21C9" }, + "⇛": { "codepoints": [8667], "characters": "\u21DB" }, + "›": { "codepoints": [8250], "characters": "\u203A" }, + "𝓇": { "codepoints": [120007], "characters": "\uD835\uDCC7" }, + "ℛ": { "codepoints": [8475], "characters": "\u211B" }, + "↱": { "codepoints": [8625], "characters": "\u21B1" }, + "↱": { "codepoints": [8625], "characters": "\u21B1" }, + "]": { "codepoints": [93], "characters": "\u005D" }, + "’": { "codepoints": [8217], "characters": "\u2019" }, + "’": { "codepoints": [8217], "characters": "\u2019" }, + "⋌": { "codepoints": [8908], "characters": "\u22CC" }, + "⋊": { "codepoints": [8906], "characters": "\u22CA" }, + "▹": { "codepoints": [9657], "characters": "\u25B9" }, + "⊵": { "codepoints": [8885], "characters": "\u22B5" }, + "▸": { "codepoints": [9656], "characters": "\u25B8" }, + "⧎": { "codepoints": [10702], "characters": "\u29CE" }, + "⧴": { "codepoints": [10740], "characters": "\u29F4" }, + "⥨": { "codepoints": [10600], "characters": "\u2968" }, + "℞": { "codepoints": [8478], "characters": "\u211E" }, + "Ś": { "codepoints": [346], "characters": "\u015A" }, + "ś": { "codepoints": [347], "characters": "\u015B" }, + "‚": { "codepoints": [8218], "characters": "\u201A" }, + "⪸": { "codepoints": [10936], "characters": "\u2AB8" }, + "Š": { "codepoints": [352], "characters": "\u0160" }, + "š": { "codepoints": [353], "characters": "\u0161" }, + "⪼": { "codepoints": [10940], "characters": "\u2ABC" }, + "≻": { "codepoints": [8827], "characters": "\u227B" }, + "≽": { "codepoints": [8829], "characters": "\u227D" }, + "⪰": { "codepoints": [10928], "characters": "\u2AB0" }, + "⪴": { "codepoints": [10932], "characters": "\u2AB4" }, + "Ş": { "codepoints": [350], "characters": "\u015E" }, + "ş": { "codepoints": [351], "characters": "\u015F" }, + "Ŝ": { "codepoints": [348], "characters": "\u015C" }, + "ŝ": { "codepoints": [349], "characters": "\u015D" }, + "⪺": { "codepoints": [10938], "characters": "\u2ABA" }, + "⪶": { "codepoints": [10934], "characters": "\u2AB6" }, + "⋩": { "codepoints": [8937], "characters": "\u22E9" }, + "⨓": { "codepoints": [10771], "characters": "\u2A13" }, + "≿": { "codepoints": [8831], "characters": "\u227F" }, + "С": { "codepoints": [1057], "characters": "\u0421" }, + "с": { "codepoints": [1089], "characters": "\u0441" }, + "⊡": { "codepoints": [8865], "characters": "\u22A1" }, + "⋅": { "codepoints": [8901], "characters": "\u22C5" }, + "⩦": { "codepoints": [10854], "characters": "\u2A66" }, + "⤥": { "codepoints": [10533], "characters": "\u2925" }, + "↘": { "codepoints": [8600], "characters": "\u2198" }, + "⇘": { "codepoints": [8664], "characters": "\u21D8" }, + "↘": { "codepoints": [8600], "characters": "\u2198" }, + "§": { "codepoints": [167], "characters": "\u00A7" }, + "§": { "codepoints": [167], "characters": "\u00A7" }, + ";": { "codepoints": [59], "characters": "\u003B" }, + "⤩": { "codepoints": [10537], "characters": "\u2929" }, + "∖": { "codepoints": [8726], "characters": "\u2216" }, + "∖": { "codepoints": [8726], "characters": "\u2216" }, + "✶": { "codepoints": [10038], "characters": "\u2736" }, + "𝔖": { "codepoints": [120086], "characters": "\uD835\uDD16" }, + "𝔰": { "codepoints": [120112], "characters": "\uD835\uDD30" }, + "⌢": { "codepoints": [8994], "characters": "\u2322" }, + "♯": { "codepoints": [9839], "characters": "\u266F" }, + "Щ": { "codepoints": [1065], "characters": "\u0429" }, + "щ": { "codepoints": [1097], "characters": "\u0449" }, + "Ш": { "codepoints": [1064], "characters": "\u0428" }, + "ш": { "codepoints": [1096], "characters": "\u0448" }, + "↓": { "codepoints": [8595], "characters": "\u2193" }, + "←": { "codepoints": [8592], "characters": "\u2190" }, + "∣": { "codepoints": [8739], "characters": "\u2223" }, + "∥": { "codepoints": [8741], "characters": "\u2225" }, + "→": { "codepoints": [8594], "characters": "\u2192" }, + "↑": { "codepoints": [8593], "characters": "\u2191" }, + "­": { "codepoints": [173], "characters": "\u00AD" }, + "­": { "codepoints": [173], "characters": "\u00AD" }, + "Σ": { "codepoints": [931], "characters": "\u03A3" }, + "σ": { "codepoints": [963], "characters": "\u03C3" }, + "ς": { "codepoints": [962], "characters": "\u03C2" }, + "ς": { "codepoints": [962], "characters": "\u03C2" }, + "∼": { "codepoints": [8764], "characters": "\u223C" }, + "⩪": { "codepoints": [10858], "characters": "\u2A6A" }, + "≃": { "codepoints": [8771], "characters": "\u2243" }, + "≃": { "codepoints": [8771], "characters": "\u2243" }, + "⪞": { "codepoints": [10910], "characters": "\u2A9E" }, + "⪠": { "codepoints": [10912], "characters": "\u2AA0" }, + "⪝": { "codepoints": [10909], "characters": "\u2A9D" }, + "⪟": { "codepoints": [10911], "characters": "\u2A9F" }, + "≆": { "codepoints": [8774], "characters": "\u2246" }, + "⨤": { "codepoints": [10788], "characters": "\u2A24" }, + "⥲": { "codepoints": [10610], "characters": "\u2972" }, + "←": { "codepoints": [8592], "characters": "\u2190" }, + "∘": { "codepoints": [8728], "characters": "\u2218" }, + "∖": { "codepoints": [8726], "characters": "\u2216" }, + "⨳": { "codepoints": [10803], "characters": "\u2A33" }, + "⧤": { "codepoints": [10724], "characters": "\u29E4" }, + "∣": { "codepoints": [8739], "characters": "\u2223" }, + "⌣": { "codepoints": [8995], "characters": "\u2323" }, + "⪪": { "codepoints": [10922], "characters": "\u2AAA" }, + "⪬": { "codepoints": [10924], "characters": "\u2AAC" }, + "⪬︀": { "codepoints": [10924, 65024], "characters": "\u2AAC\uFE00" }, + "Ь": { "codepoints": [1068], "characters": "\u042C" }, + "ь": { "codepoints": [1100], "characters": "\u044C" }, + "⌿": { "codepoints": [9023], "characters": "\u233F" }, + "⧄": { "codepoints": [10692], "characters": "\u29C4" }, + "/": { "codepoints": [47], "characters": "\u002F" }, + "𝕊": { "codepoints": [120138], "characters": "\uD835\uDD4A" }, + "𝕤": { "codepoints": [120164], "characters": "\uD835\uDD64" }, + "♠": { "codepoints": [9824], "characters": "\u2660" }, + "♠": { "codepoints": [9824], "characters": "\u2660" }, + "∥": { "codepoints": [8741], "characters": "\u2225" }, + "⊓": { "codepoints": [8851], "characters": "\u2293" }, + "⊓︀": { "codepoints": [8851, 65024], "characters": "\u2293\uFE00" }, + "⊔": { "codepoints": [8852], "characters": "\u2294" }, + "⊔︀": { "codepoints": [8852, 65024], "characters": "\u2294\uFE00" }, + "√": { "codepoints": [8730], "characters": "\u221A" }, + "⊏": { "codepoints": [8847], "characters": "\u228F" }, + "⊑": { "codepoints": [8849], "characters": "\u2291" }, + "⊏": { "codepoints": [8847], "characters": "\u228F" }, + "⊑": { "codepoints": [8849], "characters": "\u2291" }, + "⊐": { "codepoints": [8848], "characters": "\u2290" }, + "⊒": { "codepoints": [8850], "characters": "\u2292" }, + "⊐": { "codepoints": [8848], "characters": "\u2290" }, + "⊒": { "codepoints": [8850], "characters": "\u2292" }, + "□": { "codepoints": [9633], "characters": "\u25A1" }, + "□": { "codepoints": [9633], "characters": "\u25A1" }, + "⊓": { "codepoints": [8851], "characters": "\u2293" }, + "⊏": { "codepoints": [8847], "characters": "\u228F" }, + "⊑": { "codepoints": [8849], "characters": "\u2291" }, + "⊐": { "codepoints": [8848], "characters": "\u2290" }, + "⊒": { "codepoints": [8850], "characters": "\u2292" }, + "⊔": { "codepoints": [8852], "characters": "\u2294" }, + "▪": { "codepoints": [9642], "characters": "\u25AA" }, + "□": { "codepoints": [9633], "characters": "\u25A1" }, + "▪": { "codepoints": [9642], "characters": "\u25AA" }, + "→": { "codepoints": [8594], "characters": "\u2192" }, + "𝒮": { "codepoints": [119982], "characters": "\uD835\uDCAE" }, + "𝓈": { "codepoints": [120008], "characters": "\uD835\uDCC8" }, + "∖": { "codepoints": [8726], "characters": "\u2216" }, + "⌣": { "codepoints": [8995], "characters": "\u2323" }, + "⋆": { "codepoints": [8902], "characters": "\u22C6" }, + "⋆": { "codepoints": [8902], "characters": "\u22C6" }, + "☆": { "codepoints": [9734], "characters": "\u2606" }, + "★": { "codepoints": [9733], "characters": "\u2605" }, + "ϵ": { "codepoints": [1013], "characters": "\u03F5" }, + "ϕ": { "codepoints": [981], "characters": "\u03D5" }, + "¯": { "codepoints": [175], "characters": "\u00AF" }, + "⊂": { "codepoints": [8834], "characters": "\u2282" }, + "⋐": { "codepoints": [8912], "characters": "\u22D0" }, + "⪽": { "codepoints": [10941], "characters": "\u2ABD" }, + "⫅": { "codepoints": [10949], "characters": "\u2AC5" }, + "⊆": { "codepoints": [8838], "characters": "\u2286" }, + "⫃": { "codepoints": [10947], "characters": "\u2AC3" }, + "⫁": { "codepoints": [10945], "characters": "\u2AC1" }, + "⫋": { "codepoints": [10955], "characters": "\u2ACB" }, + "⊊": { "codepoints": [8842], "characters": "\u228A" }, + "⪿": { "codepoints": [10943], "characters": "\u2ABF" }, + "⥹": { "codepoints": [10617], "characters": "\u2979" }, + "⊂": { "codepoints": [8834], "characters": "\u2282" }, + "⋐": { "codepoints": [8912], "characters": "\u22D0" }, + "⊆": { "codepoints": [8838], "characters": "\u2286" }, + "⫅": { "codepoints": [10949], "characters": "\u2AC5" }, + "⊆": { "codepoints": [8838], "characters": "\u2286" }, + "⊊": { "codepoints": [8842], "characters": "\u228A" }, + "⫋": { "codepoints": [10955], "characters": "\u2ACB" }, + "⫇": { "codepoints": [10951], "characters": "\u2AC7" }, + "⫕": { "codepoints": [10965], "characters": "\u2AD5" }, + "⫓": { "codepoints": [10963], "characters": "\u2AD3" }, + "⪸": { "codepoints": [10936], "characters": "\u2AB8" }, + "≻": { "codepoints": [8827], "characters": "\u227B" }, + "≽": { "codepoints": [8829], "characters": "\u227D" }, + "≻": { "codepoints": [8827], "characters": "\u227B" }, + "⪰": { "codepoints": [10928], "characters": "\u2AB0" }, + "≽": { "codepoints": [8829], "characters": "\u227D" }, + "≿": { "codepoints": [8831], "characters": "\u227F" }, + "⪰": { "codepoints": [10928], "characters": "\u2AB0" }, + "⪺": { "codepoints": [10938], "characters": "\u2ABA" }, + "⪶": { "codepoints": [10934], "characters": "\u2AB6" }, + "⋩": { "codepoints": [8937], "characters": "\u22E9" }, + "≿": { "codepoints": [8831], "characters": "\u227F" }, + "∋": { "codepoints": [8715], "characters": "\u220B" }, + "∑": { "codepoints": [8721], "characters": "\u2211" }, + "∑": { "codepoints": [8721], "characters": "\u2211" }, + "♪": { "codepoints": [9834], "characters": "\u266A" }, + "¹": { "codepoints": [185], "characters": "\u00B9" }, + "¹": { "codepoints": [185], "characters": "\u00B9" }, + "²": { "codepoints": [178], "characters": "\u00B2" }, + "²": { "codepoints": [178], "characters": "\u00B2" }, + "³": { "codepoints": [179], "characters": "\u00B3" }, + "³": { "codepoints": [179], "characters": "\u00B3" }, + "⊃": { "codepoints": [8835], "characters": "\u2283" }, + "⋑": { "codepoints": [8913], "characters": "\u22D1" }, + "⪾": { "codepoints": [10942], "characters": "\u2ABE" }, + "⫘": { "codepoints": [10968], "characters": "\u2AD8" }, + "⫆": { "codepoints": [10950], "characters": "\u2AC6" }, + "⊇": { "codepoints": [8839], "characters": "\u2287" }, + "⫄": { "codepoints": [10948], "characters": "\u2AC4" }, + "⊃": { "codepoints": [8835], "characters": "\u2283" }, + "⊇": { "codepoints": [8839], "characters": "\u2287" }, + "⟉": { "codepoints": [10185], "characters": "\u27C9" }, + "⫗": { "codepoints": [10967], "characters": "\u2AD7" }, + "⥻": { "codepoints": [10619], "characters": "\u297B" }, + "⫂": { "codepoints": [10946], "characters": "\u2AC2" }, + "⫌": { "codepoints": [10956], "characters": "\u2ACC" }, + "⊋": { "codepoints": [8843], "characters": "\u228B" }, + "⫀": { "codepoints": [10944], "characters": "\u2AC0" }, + "⊃": { "codepoints": [8835], "characters": "\u2283" }, + "⋑": { "codepoints": [8913], "characters": "\u22D1" }, + "⊇": { "codepoints": [8839], "characters": "\u2287" }, + "⫆": { "codepoints": [10950], "characters": "\u2AC6" }, + "⊋": { "codepoints": [8843], "characters": "\u228B" }, + "⫌": { "codepoints": [10956], "characters": "\u2ACC" }, + "⫈": { "codepoints": [10952], "characters": "\u2AC8" }, + "⫔": { "codepoints": [10964], "characters": "\u2AD4" }, + "⫖": { "codepoints": [10966], "characters": "\u2AD6" }, + "⤦": { "codepoints": [10534], "characters": "\u2926" }, + "↙": { "codepoints": [8601], "characters": "\u2199" }, + "⇙": { "codepoints": [8665], "characters": "\u21D9" }, + "↙": { "codepoints": [8601], "characters": "\u2199" }, + "⤪": { "codepoints": [10538], "characters": "\u292A" }, + "ß": { "codepoints": [223], "characters": "\u00DF" }, + "ß": { "codepoints": [223], "characters": "\u00DF" }, + " ": { "codepoints": [9], "characters": "\u0009" }, + "⌖": { "codepoints": [8982], "characters": "\u2316" }, + "Τ": { "codepoints": [932], "characters": "\u03A4" }, + "τ": { "codepoints": [964], "characters": "\u03C4" }, + "⎴": { "codepoints": [9140], "characters": "\u23B4" }, + "Ť": { "codepoints": [356], "characters": "\u0164" }, + "ť": { "codepoints": [357], "characters": "\u0165" }, + "Ţ": { "codepoints": [354], "characters": "\u0162" }, + "ţ": { "codepoints": [355], "characters": "\u0163" }, + "Т": { "codepoints": [1058], "characters": "\u0422" }, + "т": { "codepoints": [1090], "characters": "\u0442" }, + "⃛": { "codepoints": [8411], "characters": "\u20DB" }, + "⌕": { "codepoints": [8981], "characters": "\u2315" }, + "𝔗": { "codepoints": [120087], "characters": "\uD835\uDD17" }, + "𝔱": { "codepoints": [120113], "characters": "\uD835\uDD31" }, + "∴": { "codepoints": [8756], "characters": "\u2234" }, + "∴": { "codepoints": [8756], "characters": "\u2234" }, + "∴": { "codepoints": [8756], "characters": "\u2234" }, + "Θ": { "codepoints": [920], "characters": "\u0398" }, + "θ": { "codepoints": [952], "characters": "\u03B8" }, + "ϑ": { "codepoints": [977], "characters": "\u03D1" }, + "ϑ": { "codepoints": [977], "characters": "\u03D1" }, + "≈": { "codepoints": [8776], "characters": "\u2248" }, + "∼": { "codepoints": [8764], "characters": "\u223C" }, + "  ": { "codepoints": [8287, 8202], "characters": "\u205F\u200A" }, + " ": { "codepoints": [8201], "characters": "\u2009" }, + " ": { "codepoints": [8201], "characters": "\u2009" }, + "≈": { "codepoints": [8776], "characters": "\u2248" }, + "∼": { "codepoints": [8764], "characters": "\u223C" }, + "Þ": { "codepoints": [222], "characters": "\u00DE" }, + "Þ": { "codepoints": [222], "characters": "\u00DE" }, + "þ": { "codepoints": [254], "characters": "\u00FE" }, + "þ": { "codepoints": [254], "characters": "\u00FE" }, + "˜": { "codepoints": [732], "characters": "\u02DC" }, + "∼": { "codepoints": [8764], "characters": "\u223C" }, + "≃": { "codepoints": [8771], "characters": "\u2243" }, + "≅": { "codepoints": [8773], "characters": "\u2245" }, + "≈": { "codepoints": [8776], "characters": "\u2248" }, + "⨱": { "codepoints": [10801], "characters": "\u2A31" }, + "⊠": { "codepoints": [8864], "characters": "\u22A0" }, + "×": { "codepoints": [215], "characters": "\u00D7" }, + "×": { "codepoints": [215], "characters": "\u00D7" }, + "⨰": { "codepoints": [10800], "characters": "\u2A30" }, + "∭": { "codepoints": [8749], "characters": "\u222D" }, + "⤨": { "codepoints": [10536], "characters": "\u2928" }, + "⌶": { "codepoints": [9014], "characters": "\u2336" }, + "⫱": { "codepoints": [10993], "characters": "\u2AF1" }, + "⊤": { "codepoints": [8868], "characters": "\u22A4" }, + "𝕋": { "codepoints": [120139], "characters": "\uD835\uDD4B" }, + "𝕥": { "codepoints": [120165], "characters": "\uD835\uDD65" }, + "⫚": { "codepoints": [10970], "characters": "\u2ADA" }, + "⤩": { "codepoints": [10537], "characters": "\u2929" }, + "‴": { "codepoints": [8244], "characters": "\u2034" }, + "™": { "codepoints": [8482], "characters": "\u2122" }, + "™": { "codepoints": [8482], "characters": "\u2122" }, + "▵": { "codepoints": [9653], "characters": "\u25B5" }, + "▿": { "codepoints": [9663], "characters": "\u25BF" }, + "◃": { "codepoints": [9667], "characters": "\u25C3" }, + "⊴": { "codepoints": [8884], "characters": "\u22B4" }, + "≜": { "codepoints": [8796], "characters": "\u225C" }, + "▹": { "codepoints": [9657], "characters": "\u25B9" }, + "⊵": { "codepoints": [8885], "characters": "\u22B5" }, + "◬": { "codepoints": [9708], "characters": "\u25EC" }, + "≜": { "codepoints": [8796], "characters": "\u225C" }, + "⨺": { "codepoints": [10810], "characters": "\u2A3A" }, + "⃛": { "codepoints": [8411], "characters": "\u20DB" }, + "⨹": { "codepoints": [10809], "characters": "\u2A39" }, + "⧍": { "codepoints": [10701], "characters": "\u29CD" }, + "⨻": { "codepoints": [10811], "characters": "\u2A3B" }, + "⏢": { "codepoints": [9186], "characters": "\u23E2" }, + "𝒯": { "codepoints": [119983], "characters": "\uD835\uDCAF" }, + "𝓉": { "codepoints": [120009], "characters": "\uD835\uDCC9" }, + "Ц": { "codepoints": [1062], "characters": "\u0426" }, + "ц": { "codepoints": [1094], "characters": "\u0446" }, + "Ћ": { "codepoints": [1035], "characters": "\u040B" }, + "ћ": { "codepoints": [1115], "characters": "\u045B" }, + "Ŧ": { "codepoints": [358], "characters": "\u0166" }, + "ŧ": { "codepoints": [359], "characters": "\u0167" }, + "≬": { "codepoints": [8812], "characters": "\u226C" }, + "↞": { "codepoints": [8606], "characters": "\u219E" }, + "↠": { "codepoints": [8608], "characters": "\u21A0" }, + "Ú": { "codepoints": [218], "characters": "\u00DA" }, + "Ú": { "codepoints": [218], "characters": "\u00DA" }, + "ú": { "codepoints": [250], "characters": "\u00FA" }, + "ú": { "codepoints": [250], "characters": "\u00FA" }, + "↑": { "codepoints": [8593], "characters": "\u2191" }, + "↟": { "codepoints": [8607], "characters": "\u219F" }, + "⇑": { "codepoints": [8657], "characters": "\u21D1" }, + "⥉": { "codepoints": [10569], "characters": "\u2949" }, + "Ў": { "codepoints": [1038], "characters": "\u040E" }, + "ў": { "codepoints": [1118], "characters": "\u045E" }, + "Ŭ": { "codepoints": [364], "characters": "\u016C" }, + "ŭ": { "codepoints": [365], "characters": "\u016D" }, + "Û": { "codepoints": [219], "characters": "\u00DB" }, + "Û": { "codepoints": [219], "characters": "\u00DB" }, + "û": { "codepoints": [251], "characters": "\u00FB" }, + "û": { "codepoints": [251], "characters": "\u00FB" }, + "У": { "codepoints": [1059], "characters": "\u0423" }, + "у": { "codepoints": [1091], "characters": "\u0443" }, + "⇅": { "codepoints": [8645], "characters": "\u21C5" }, + "Ű": { "codepoints": [368], "characters": "\u0170" }, + "ű": { "codepoints": [369], "characters": "\u0171" }, + "⥮": { "codepoints": [10606], "characters": "\u296E" }, + "⥾": { "codepoints": [10622], "characters": "\u297E" }, + "𝔘": { "codepoints": [120088], "characters": "\uD835\uDD18" }, + "𝔲": { "codepoints": [120114], "characters": "\uD835\uDD32" }, + "Ù": { "codepoints": [217], "characters": "\u00D9" }, + "Ù": { "codepoints": [217], "characters": "\u00D9" }, + "ù": { "codepoints": [249], "characters": "\u00F9" }, + "ù": { "codepoints": [249], "characters": "\u00F9" }, + "⥣": { "codepoints": [10595], "characters": "\u2963" }, + "↿": { "codepoints": [8639], "characters": "\u21BF" }, + "↾": { "codepoints": [8638], "characters": "\u21BE" }, + "▀": { "codepoints": [9600], "characters": "\u2580" }, + "⌜": { "codepoints": [8988], "characters": "\u231C" }, + "⌜": { "codepoints": [8988], "characters": "\u231C" }, + "⌏": { "codepoints": [8975], "characters": "\u230F" }, + "◸": { "codepoints": [9720], "characters": "\u25F8" }, + "Ū": { "codepoints": [362], "characters": "\u016A" }, + "ū": { "codepoints": [363], "characters": "\u016B" }, + "¨": { "codepoints": [168], "characters": "\u00A8" }, + "¨": { "codepoints": [168], "characters": "\u00A8" }, + "_": { "codepoints": [95], "characters": "\u005F" }, + "⏟": { "codepoints": [9183], "characters": "\u23DF" }, + "⎵": { "codepoints": [9141], "characters": "\u23B5" }, + "⏝": { "codepoints": [9181], "characters": "\u23DD" }, + "⋃": { "codepoints": [8899], "characters": "\u22C3" }, + "⊎": { "codepoints": [8846], "characters": "\u228E" }, + "Ų": { "codepoints": [370], "characters": "\u0172" }, + "ų": { "codepoints": [371], "characters": "\u0173" }, + "𝕌": { "codepoints": [120140], "characters": "\uD835\uDD4C" }, + "𝕦": { "codepoints": [120166], "characters": "\uD835\uDD66" }, + "⤒": { "codepoints": [10514], "characters": "\u2912" }, + "↑": { "codepoints": [8593], "characters": "\u2191" }, + "↑": { "codepoints": [8593], "characters": "\u2191" }, + "⇑": { "codepoints": [8657], "characters": "\u21D1" }, + "⇅": { "codepoints": [8645], "characters": "\u21C5" }, + "↕": { "codepoints": [8597], "characters": "\u2195" }, + "↕": { "codepoints": [8597], "characters": "\u2195" }, + "⇕": { "codepoints": [8661], "characters": "\u21D5" }, + "⥮": { "codepoints": [10606], "characters": "\u296E" }, + "↿": { "codepoints": [8639], "characters": "\u21BF" }, + "↾": { "codepoints": [8638], "characters": "\u21BE" }, + "⊎": { "codepoints": [8846], "characters": "\u228E" }, + "↖": { "codepoints": [8598], "characters": "\u2196" }, + "↗": { "codepoints": [8599], "characters": "\u2197" }, + "υ": { "codepoints": [965], "characters": "\u03C5" }, + "ϒ": { "codepoints": [978], "characters": "\u03D2" }, + "ϒ": { "codepoints": [978], "characters": "\u03D2" }, + "Υ": { "codepoints": [933], "characters": "\u03A5" }, + "υ": { "codepoints": [965], "characters": "\u03C5" }, + "↥": { "codepoints": [8613], "characters": "\u21A5" }, + "⊥": { "codepoints": [8869], "characters": "\u22A5" }, + "⇈": { "codepoints": [8648], "characters": "\u21C8" }, + "⌝": { "codepoints": [8989], "characters": "\u231D" }, + "⌝": { "codepoints": [8989], "characters": "\u231D" }, + "⌎": { "codepoints": [8974], "characters": "\u230E" }, + "Ů": { "codepoints": [366], "characters": "\u016E" }, + "ů": { "codepoints": [367], "characters": "\u016F" }, + "◹": { "codepoints": [9721], "characters": "\u25F9" }, + "𝒰": { "codepoints": [119984], "characters": "\uD835\uDCB0" }, + "𝓊": { "codepoints": [120010], "characters": "\uD835\uDCCA" }, + "⋰": { "codepoints": [8944], "characters": "\u22F0" }, + "Ũ": { "codepoints": [360], "characters": "\u0168" }, + "ũ": { "codepoints": [361], "characters": "\u0169" }, + "▵": { "codepoints": [9653], "characters": "\u25B5" }, + "▴": { "codepoints": [9652], "characters": "\u25B4" }, + "⇈": { "codepoints": [8648], "characters": "\u21C8" }, + "Ü": { "codepoints": [220], "characters": "\u00DC" }, + "Ü": { "codepoints": [220], "characters": "\u00DC" }, + "ü": { "codepoints": [252], "characters": "\u00FC" }, + "ü": { "codepoints": [252], "characters": "\u00FC" }, + "⦧": { "codepoints": [10663], "characters": "\u29A7" }, + "⦜": { "codepoints": [10652], "characters": "\u299C" }, + "ϵ": { "codepoints": [1013], "characters": "\u03F5" }, + "ϰ": { "codepoints": [1008], "characters": "\u03F0" }, + "∅": { "codepoints": [8709], "characters": "\u2205" }, + "ϕ": { "codepoints": [981], "characters": "\u03D5" }, + "ϖ": { "codepoints": [982], "characters": "\u03D6" }, + "∝": { "codepoints": [8733], "characters": "\u221D" }, + "↕": { "codepoints": [8597], "characters": "\u2195" }, + "⇕": { "codepoints": [8661], "characters": "\u21D5" }, + "ϱ": { "codepoints": [1009], "characters": "\u03F1" }, + "ς": { "codepoints": [962], "characters": "\u03C2" }, + "⊊︀": { "codepoints": [8842, 65024], "characters": "\u228A\uFE00" }, + "⫋︀": { "codepoints": [10955, 65024], "characters": "\u2ACB\uFE00" }, + "⊋︀": { "codepoints": [8843, 65024], "characters": "\u228B\uFE00" }, + "⫌︀": { "codepoints": [10956, 65024], "characters": "\u2ACC\uFE00" }, + "ϑ": { "codepoints": [977], "characters": "\u03D1" }, + "⊲": { "codepoints": [8882], "characters": "\u22B2" }, + "⊳": { "codepoints": [8883], "characters": "\u22B3" }, + "⫨": { "codepoints": [10984], "characters": "\u2AE8" }, + "⫫": { "codepoints": [10987], "characters": "\u2AEB" }, + "⫩": { "codepoints": [10985], "characters": "\u2AE9" }, + "В": { "codepoints": [1042], "characters": "\u0412" }, + "в": { "codepoints": [1074], "characters": "\u0432" }, + "⊢": { "codepoints": [8866], "characters": "\u22A2" }, + "⊨": { "codepoints": [8872], "characters": "\u22A8" }, + "⊩": { "codepoints": [8873], "characters": "\u22A9" }, + "⊫": { "codepoints": [8875], "characters": "\u22AB" }, + "⫦": { "codepoints": [10982], "characters": "\u2AE6" }, + "⊻": { "codepoints": [8891], "characters": "\u22BB" }, + "∨": { "codepoints": [8744], "characters": "\u2228" }, + "⋁": { "codepoints": [8897], "characters": "\u22C1" }, + "≚": { "codepoints": [8794], "characters": "\u225A" }, + "⋮": { "codepoints": [8942], "characters": "\u22EE" }, + "|": { "codepoints": [124], "characters": "\u007C" }, + "‖": { "codepoints": [8214], "characters": "\u2016" }, + "|": { "codepoints": [124], "characters": "\u007C" }, + "‖": { "codepoints": [8214], "characters": "\u2016" }, + "∣": { "codepoints": [8739], "characters": "\u2223" }, + "|": { "codepoints": [124], "characters": "\u007C" }, + "❘": { "codepoints": [10072], "characters": "\u2758" }, + "≀": { "codepoints": [8768], "characters": "\u2240" }, + " ": { "codepoints": [8202], "characters": "\u200A" }, + "𝔙": { "codepoints": [120089], "characters": "\uD835\uDD19" }, + "𝔳": { "codepoints": [120115], "characters": "\uD835\uDD33" }, + "⊲": { "codepoints": [8882], "characters": "\u22B2" }, + "⊂⃒": { "codepoints": [8834, 8402], "characters": "\u2282\u20D2" }, + "⊃⃒": { "codepoints": [8835, 8402], "characters": "\u2283\u20D2" }, + "𝕍": { "codepoints": [120141], "characters": "\uD835\uDD4D" }, + "𝕧": { "codepoints": [120167], "characters": "\uD835\uDD67" }, + "∝": { "codepoints": [8733], "characters": "\u221D" }, + "⊳": { "codepoints": [8883], "characters": "\u22B3" }, + "𝒱": { "codepoints": [119985], "characters": "\uD835\uDCB1" }, + "𝓋": { "codepoints": [120011], "characters": "\uD835\uDCCB" }, + "⫋︀": { "codepoints": [10955, 65024], "characters": "\u2ACB\uFE00" }, + "⊊︀": { "codepoints": [8842, 65024], "characters": "\u228A\uFE00" }, + "⫌︀": { "codepoints": [10956, 65024], "characters": "\u2ACC\uFE00" }, + "⊋︀": { "codepoints": [8843, 65024], "characters": "\u228B\uFE00" }, + "⊪": { "codepoints": [8874], "characters": "\u22AA" }, + "⦚": { "codepoints": [10650], "characters": "\u299A" }, + "Ŵ": { "codepoints": [372], "characters": "\u0174" }, + "ŵ": { "codepoints": [373], "characters": "\u0175" }, + "⩟": { "codepoints": [10847], "characters": "\u2A5F" }, + "∧": { "codepoints": [8743], "characters": "\u2227" }, + "⋀": { "codepoints": [8896], "characters": "\u22C0" }, + "≙": { "codepoints": [8793], "characters": "\u2259" }, + "℘": { "codepoints": [8472], "characters": "\u2118" }, + "𝔚": { "codepoints": [120090], "characters": "\uD835\uDD1A" }, + "𝔴": { "codepoints": [120116], "characters": "\uD835\uDD34" }, + "𝕎": { "codepoints": [120142], "characters": "\uD835\uDD4E" }, + "𝕨": { "codepoints": [120168], "characters": "\uD835\uDD68" }, + "℘": { "codepoints": [8472], "characters": "\u2118" }, + "≀": { "codepoints": [8768], "characters": "\u2240" }, + "≀": { "codepoints": [8768], "characters": "\u2240" }, + "𝒲": { "codepoints": [119986], "characters": "\uD835\uDCB2" }, + "𝓌": { "codepoints": [120012], "characters": "\uD835\uDCCC" }, + "⋂": { "codepoints": [8898], "characters": "\u22C2" }, + "◯": { "codepoints": [9711], "characters": "\u25EF" }, + "⋃": { "codepoints": [8899], "characters": "\u22C3" }, + "▽": { "codepoints": [9661], "characters": "\u25BD" }, + "𝔛": { "codepoints": [120091], "characters": "\uD835\uDD1B" }, + "𝔵": { "codepoints": [120117], "characters": "\uD835\uDD35" }, + "⟷": { "codepoints": [10231], "characters": "\u27F7" }, + "⟺": { "codepoints": [10234], "characters": "\u27FA" }, + "Ξ": { "codepoints": [926], "characters": "\u039E" }, + "ξ": { "codepoints": [958], "characters": "\u03BE" }, + "⟵": { "codepoints": [10229], "characters": "\u27F5" }, + "⟸": { "codepoints": [10232], "characters": "\u27F8" }, + "⟼": { "codepoints": [10236], "characters": "\u27FC" }, + "⋻": { "codepoints": [8955], "characters": "\u22FB" }, + "⨀": { "codepoints": [10752], "characters": "\u2A00" }, + "𝕏": { "codepoints": [120143], "characters": "\uD835\uDD4F" }, + "𝕩": { "codepoints": [120169], "characters": "\uD835\uDD69" }, + "⨁": { "codepoints": [10753], "characters": "\u2A01" }, + "⨂": { "codepoints": [10754], "characters": "\u2A02" }, + "⟶": { "codepoints": [10230], "characters": "\u27F6" }, + "⟹": { "codepoints": [10233], "characters": "\u27F9" }, + "𝒳": { "codepoints": [119987], "characters": "\uD835\uDCB3" }, + "𝓍": { "codepoints": [120013], "characters": "\uD835\uDCCD" }, + "⨆": { "codepoints": [10758], "characters": "\u2A06" }, + "⨄": { "codepoints": [10756], "characters": "\u2A04" }, + "△": { "codepoints": [9651], "characters": "\u25B3" }, + "⋁": { "codepoints": [8897], "characters": "\u22C1" }, + "⋀": { "codepoints": [8896], "characters": "\u22C0" }, + "Ý": { "codepoints": [221], "characters": "\u00DD" }, + "Ý": { "codepoints": [221], "characters": "\u00DD" }, + "ý": { "codepoints": [253], "characters": "\u00FD" }, + "ý": { "codepoints": [253], "characters": "\u00FD" }, + "Я": { "codepoints": [1071], "characters": "\u042F" }, + "я": { "codepoints": [1103], "characters": "\u044F" }, + "Ŷ": { "codepoints": [374], "characters": "\u0176" }, + "ŷ": { "codepoints": [375], "characters": "\u0177" }, + "Ы": { "codepoints": [1067], "characters": "\u042B" }, + "ы": { "codepoints": [1099], "characters": "\u044B" }, + "¥": { "codepoints": [165], "characters": "\u00A5" }, + "¥": { "codepoints": [165], "characters": "\u00A5" }, + "𝔜": { "codepoints": [120092], "characters": "\uD835\uDD1C" }, + "𝔶": { "codepoints": [120118], "characters": "\uD835\uDD36" }, + "Ї": { "codepoints": [1031], "characters": "\u0407" }, + "ї": { "codepoints": [1111], "characters": "\u0457" }, + "𝕐": { "codepoints": [120144], "characters": "\uD835\uDD50" }, + "𝕪": { "codepoints": [120170], "characters": "\uD835\uDD6A" }, + "𝒴": { "codepoints": [119988], "characters": "\uD835\uDCB4" }, + "𝓎": { "codepoints": [120014], "characters": "\uD835\uDCCE" }, + "Ю": { "codepoints": [1070], "characters": "\u042E" }, + "ю": { "codepoints": [1102], "characters": "\u044E" }, + "ÿ": { "codepoints": [255], "characters": "\u00FF" }, + "ÿ": { "codepoints": [255], "characters": "\u00FF" }, + "Ÿ": { "codepoints": [376], "characters": "\u0178" }, + "Ź": { "codepoints": [377], "characters": "\u0179" }, + "ź": { "codepoints": [378], "characters": "\u017A" }, + "Ž": { "codepoints": [381], "characters": "\u017D" }, + "ž": { "codepoints": [382], "characters": "\u017E" }, + "З": { "codepoints": [1047], "characters": "\u0417" }, + "з": { "codepoints": [1079], "characters": "\u0437" }, + "Ż": { "codepoints": [379], "characters": "\u017B" }, + "ż": { "codepoints": [380], "characters": "\u017C" }, + "ℨ": { "codepoints": [8488], "characters": "\u2128" }, + "​": { "codepoints": [8203], "characters": "\u200B" }, + "Ζ": { "codepoints": [918], "characters": "\u0396" }, + "ζ": { "codepoints": [950], "characters": "\u03B6" }, + "𝔷": { "codepoints": [120119], "characters": "\uD835\uDD37" }, + "ℨ": { "codepoints": [8488], "characters": "\u2128" }, + "Ж": { "codepoints": [1046], "characters": "\u0416" }, + "ж": { "codepoints": [1078], "characters": "\u0436" }, + "⇝": { "codepoints": [8669], "characters": "\u21DD" }, + "𝕫": { "codepoints": [120171], "characters": "\uD835\uDD6B" }, + "ℤ": { "codepoints": [8484], "characters": "\u2124" }, + "𝒵": { "codepoints": [119989], "characters": "\uD835\uDCB5" }, + "𝓏": { "codepoints": [120015], "characters": "\uD835\uDCCF" }, + "‍": { "codepoints": [8205], "characters": "\u200D" }, + "‌": { "codepoints": [8204], "characters": "\u200C" } +} diff --git a/priv/tokenizer_test_template.ex.eex b/priv/tokenizer_test_template.ex.eex new file mode 100644 index 00000000..1d8a2c12 --- /dev/null +++ b/priv/tokenizer_test_template.ex.eex @@ -0,0 +1,22 @@ +defmodule Floki.HTML.Generated.Tokenizer.<%= test_name %>Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests <%= test_file %>". + # html5lib-tests rev: <%= revision %> + + alias Floki.HTML.Tokenizer + +<%= for %{"input" => input, "output" => output, "description" => description} <- tests do %> + test <%= inspect("tokenize/1 " <> description) %> do + input = <%= inspect(input) %> + output = <%= inspect(output) %> + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +<% end %> +end diff --git a/src/floki_selector_lexer.xrl b/src/floki_selector_lexer.xrl index 1a63b7c2..3827c764 100644 --- a/src/floki_selector_lexer.xrl +++ b/src/floki_selector_lexer.xrl @@ -1,6 +1,6 @@ Definitions. -IDENTIFIER = [-A-Za-z0-9_]+ +IDENTIFIER = [-A-Za-z0-9_]+(\\\.[-A-Za-z0-9_]+)* QUOTED = (\"[^"]*\"|\'[^']*\') PARENTESIS = \([^)]*\) INT = [0-9]+ @@ -9,12 +9,14 @@ ODD = (o|O)(d|D)(d|D) EVEN = (e|E)(v|V)(e|E)(n|N) PSEUDO_PATT = (\+|-)?({INT})?(n|N)((\+|-){INT})? SYMBOL = [\[\]*] +ATTRIBUTE_IDENTIFIER = \s[is]\] W = [\s\t\r\n\f] Rules. {IDENTIFIER} : {token, {identifier, TokenLine, TokenChars}}. {QUOTED} : {token, {quoted, TokenLine, remove_wrapper(TokenChars)}}. +{ATTRIBUTE_IDENTIFIER} : {token, {attribute_identifier, TokenLine, TokenChars}}. {SYMBOL} : {token, {TokenChars, TokenLine}}. #{IDENTIFIER} : {token, {hash, TokenLine, tail(TokenChars)}}. \.{IDENTIFIER} : {token, {class, TokenLine, tail(TokenChars)}}. diff --git a/test/floki/html/generated/tokenizer/entities_test.exs b/test/floki/html/generated/tokenizer/entities_test.exs new file mode 100644 index 00000000..f03e7943 --- /dev/null +++ b/test/floki/html/generated/tokenizer/entities_test.exs @@ -0,0 +1,783 @@ +defmodule Floki.HTML.Generated.Tokenizer.EntitiesTest do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests entities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Ambiguous ampersand." do + input = "&rrrraannddom;" + output = [["Character", "&rrrraannddom;"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 CR as hexadecimal numeric entity" do + input = " " + output = [["Character", "\r"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 CR as numeric entity" do + input = " " + output = [["Character", "\r"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Decimal numeric entity followed by hex character A." do + input = "aA" + output = [["Character", "aA"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Decimal numeric entity followed by hex character a." do + input = "aa" + output = [["Character", "aa"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Decimal numeric entity followed by hex character f." do + input = "af" + output = [["Character", "af"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity name followed by the equals sign in an attribute value." do + input = "" + output = [["StartTag", "h", %{"a" => "&lang="}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Semicolonless named entity 'not' followed by 'i;' in body" do + input = "¬i;" + output = [["Character", "¬i;"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name." do + input = "" + output = [["StartTag", "h", %{"a" => "¬i;"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Very long undefined named entity in body" do + input = + "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;" + + output = [ + [ + "Character", + "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;" + ] + ] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 BULLET hexadecimal numeric entity." do + input = "•" + output = [["Character", "•"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 BULLET numeric entity." do + input = "•" + output = [["Character", "•"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 DAGGER hexadecimal numeric entity." do + input = "†" + output = [["Character", "†"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 DAGGER numeric entity." do + input = "†" + output = [["Character", "†"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 DOUBLE DAGGER hexadecimal numeric entity." do + input = "‡" + output = [["Character", "‡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 DOUBLE DAGGER numeric entity." do + input = "‡" + output = [["Character", "‡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity." do + input = "„" + output = [["Character", "„"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity." do + input = "„" + output = [["Character", "„"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 EM DASH hexadecimal numeric entity." do + input = "—" + output = [["Character", "—"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 EM DASH numeric entity." do + input = "—" + output = [["Character", "—"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 EN DASH hexadecimal numeric entity." do + input = "–" + output = [["Character", "–"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 EN DASH numeric entity." do + input = "–" + output = [["Character", "–"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 EURO SIGN hexadecimal numeric entity." do + input = "€" + output = [["Character", "€"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 EURO SIGN numeric entity." do + input = "€" + output = [["Character", "€"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity." do + input = "…" + output = [["Character", "…"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 HORIZONTAL ELLIPSIS numeric entity." do + input = "…" + output = [["Character", "…"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity." do + input = "Š" + output = [["Character", "Š"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity." do + input = "Š" + output = [["Character", "Š"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity." do + input = "Ÿ" + output = [["Character", "Ÿ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity." do + input = "Ž" + output = [["Character", "Ž"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity." do + input = "Ž" + output = [["Character", "Ž"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity." do + input = "Œ" + output = [["Character", "Œ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity." do + input = "Œ" + output = [["Character", "Œ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity." do + input = "ƒ" + output = [["Character", "ƒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity." do + input = "ƒ" + output = [["Character", "ƒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity." do + input = "š" + output = [["Character", "š"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity." do + input = "š" + output = [["Character", "š"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity." do + input = "ž" + output = [["Character", "ž"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity." do + input = "œ" + output = [["Character", "œ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LATIN SMALL LIGATURE OE numeric entity." do + input = "œ" + output = [["Character", "œ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity." do + input = "“" + output = [["Character", "“"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity." do + input = "“" + output = [["Character", "“"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity." do + input = "‘" + output = [["Character", "‘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity." do + input = "‘" + output = [["Character", "‘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity." do + input = "ˆ" + output = [["Character", "ˆ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity." do + input = "ˆ" + output = [["Character", "ˆ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 PER MILLE SIGN hexadecimal numeric entity." do + input = "‰" + output = [["Character", "‰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 PER MILLE SIGN numeric entity." do + input = "‰" + output = [["Character", "‰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity." do + input = "" + output = [["Character", <<194, 129>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 REPLACEMENT CHAR numeric entity." do + input = "" + output = [["Character", <<194, 129>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity." do + input = "”" + output = [["Character", "”"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity." do + input = "”" + output = [["Character", "”"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity." do + input = "’" + output = [["Character", "’"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity." do + input = "’" + output = [["Character", "’"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity." do + input = "‹" + output = [["Character", "‹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity." do + input = "‹" + output = [["Character", "‹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity." do + input = "‚" + output = [["Character", "‚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity." do + input = "‚" + output = [["Character", "‚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity." do + input = "›" + output = [["Character", "›"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity." do + input = "›" + output = [["Character", "›"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SMALL TILDE hexadecimal numeric entity." do + input = "˜" + output = [["Character", "˜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 SMALL TILDE numeric entity." do + input = "˜" + output = [["Character", "˜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 TRADE MARK SIGN hexadecimal numeric entity." do + input = "™" + output = [["Character", "™"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Windows-1252 TRADE MARK SIGN numeric entity." do + input = "™" + output = [["Character", "™"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part10_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part10_test.exs new file mode 100644 index 00000000..48618b2d --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part10_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart10Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: emsp14 without a semi-colon" do + input = "&emsp14" + output = [["Character", "&emsp14"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eng without a semi-colon" do + input = "&eng" + output = [["Character", "&eng"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ensp without a semi-colon" do + input = "&ensp" + output = [["Character", "&ensp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eogon without a semi-colon" do + input = "&eogon" + output = [["Character", "&eogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eopf without a semi-colon" do + input = "&eopf" + output = [["Character", "&eopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: epar without a semi-colon" do + input = "&epar" + output = [["Character", "&epar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eparsl without a semi-colon" do + input = "&eparsl" + output = [["Character", "&eparsl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eplus without a semi-colon" do + input = "&eplus" + output = [["Character", "&eplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: epsi without a semi-colon" do + input = "&epsi" + output = [["Character", "&epsi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: epsilon without a semi-colon" do + input = "&epsilon" + output = [["Character", "&epsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: epsiv without a semi-colon" do + input = "&epsiv" + output = [["Character", "&epsiv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eqcirc without a semi-colon" do + input = "&eqcirc" + output = [["Character", "&eqcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eqcolon without a semi-colon" do + input = "&eqcolon" + output = [["Character", "&eqcolon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eqsim without a semi-colon" do + input = "&eqsim" + output = [["Character", "&eqsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eqslantgtr without a semi-colon" do + input = "&eqslantgtr" + output = [["Character", "&eqslantgtr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eqslantless without a semi-colon" do + input = "&eqslantless" + output = [["Character", "&eqslantless"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: equals without a semi-colon" do + input = "&equals" + output = [["Character", "&equals"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: equest without a semi-colon" do + input = "&equest" + output = [["Character", "&equest"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: equiv without a semi-colon" do + input = "&equiv" + output = [["Character", "&equiv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: equivDD without a semi-colon" do + input = "&equivDD" + output = [["Character", "&equivDD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eqvparsl without a semi-colon" do + input = "&eqvparsl" + output = [["Character", "&eqvparsl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: erDot without a semi-colon" do + input = "&erDot" + output = [["Character", "&erDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: erarr without a semi-colon" do + input = "&erarr" + output = [["Character", "&erarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: escr without a semi-colon" do + input = "&escr" + output = [["Character", "&escr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: esdot without a semi-colon" do + input = "&esdot" + output = [["Character", "&esdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: esim without a semi-colon" do + input = "&esim" + output = [["Character", "&esim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eta without a semi-colon" do + input = "&eta" + output = [["Character", "&eta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: euro without a semi-colon" do + input = "&euro" + output = [["Character", "&euro"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: excl without a semi-colon" do + input = "&excl" + output = [["Character", "&excl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: exist without a semi-colon" do + input = "&exist" + output = [["Character", "&exist"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: expectation without a semi-colon" do + input = "&expectation" + output = [["Character", "&expectation"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: exponentiale without a semi-colon" do + input = "&exponentiale" + output = [["Character", "&exponentiale"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fallingdotseq without a semi-colon" do + input = "&fallingdotseq" + output = [["Character", "&fallingdotseq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fcy without a semi-colon" do + input = "&fcy" + output = [["Character", "&fcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: female without a semi-colon" do + input = "&female" + output = [["Character", "&female"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ffilig without a semi-colon" do + input = "&ffilig" + output = [["Character", "&ffilig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fflig without a semi-colon" do + input = "&fflig" + output = [["Character", "&fflig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ffllig without a semi-colon" do + input = "&ffllig" + output = [["Character", "&ffllig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ffr without a semi-colon" do + input = "&ffr" + output = [["Character", "&ffr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: filig without a semi-colon" do + input = "&filig" + output = [["Character", "&filig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fjlig without a semi-colon" do + input = "&fjlig" + output = [["Character", "&fjlig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: flat without a semi-colon" do + input = "&flat" + output = [["Character", "&flat"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fllig without a semi-colon" do + input = "&fllig" + output = [["Character", "&fllig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fltns without a semi-colon" do + input = "&fltns" + output = [["Character", "&fltns"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fnof without a semi-colon" do + input = "&fnof" + output = [["Character", "&fnof"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fopf without a semi-colon" do + input = "&fopf" + output = [["Character", "&fopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: forall without a semi-colon" do + input = "&forall" + output = [["Character", "&forall"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fork without a semi-colon" do + input = "&fork" + output = [["Character", "&fork"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: forkv without a semi-colon" do + input = "&forkv" + output = [["Character", "&forkv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fpartint without a semi-colon" do + input = "&fpartint" + output = [["Character", "&fpartint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac13 without a semi-colon" do + input = "&frac13" + output = [["Character", "&frac13"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac15 without a semi-colon" do + input = "&frac15" + output = [["Character", "&frac15"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac16 without a semi-colon" do + input = "&frac16" + output = [["Character", "&frac16"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac18 without a semi-colon" do + input = "&frac18" + output = [["Character", "&frac18"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac23 without a semi-colon" do + input = "&frac23" + output = [["Character", "&frac23"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac25 without a semi-colon" do + input = "&frac25" + output = [["Character", "&frac25"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac35 without a semi-colon" do + input = "&frac35" + output = [["Character", "&frac35"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac38 without a semi-colon" do + input = "&frac38" + output = [["Character", "&frac38"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac45 without a semi-colon" do + input = "&frac45" + output = [["Character", "&frac45"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac56 without a semi-colon" do + input = "&frac56" + output = [["Character", "&frac56"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac58 without a semi-colon" do + input = "&frac58" + output = [["Character", "&frac58"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frac78 without a semi-colon" do + input = "&frac78" + output = [["Character", "&frac78"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frasl without a semi-colon" do + input = "&frasl" + output = [["Character", "&frasl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: frown without a semi-colon" do + input = "&frown" + output = [["Character", "&frown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: fscr without a semi-colon" do + input = "&fscr" + output = [["Character", "&fscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gE without a semi-colon" do + input = "&gE" + output = [["Character", "&gE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gEl without a semi-colon" do + input = "&gEl" + output = [["Character", "&gEl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gacute without a semi-colon" do + input = "&gacute" + output = [["Character", "&gacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gamma without a semi-colon" do + input = "&gamma" + output = [["Character", "&gamma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gammad without a semi-colon" do + input = "&gammad" + output = [["Character", "&gammad"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gap without a semi-colon" do + input = "&gap" + output = [["Character", "&gap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gbreve without a semi-colon" do + input = "&gbreve" + output = [["Character", "&gbreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gcirc without a semi-colon" do + input = "&gcirc" + output = [["Character", "&gcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gcy without a semi-colon" do + input = "&gcy" + output = [["Character", "&gcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gdot without a semi-colon" do + input = "&gdot" + output = [["Character", "&gdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ge without a semi-colon" do + input = "&ge" + output = [["Character", "&ge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gel without a semi-colon" do + input = "&gel" + output = [["Character", "&gel"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: geq without a semi-colon" do + input = "&geq" + output = [["Character", "&geq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: geqq without a semi-colon" do + input = "&geqq" + output = [["Character", "&geqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: geqslant without a semi-colon" do + input = "&geqslant" + output = [["Character", "&geqslant"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ges without a semi-colon" do + input = "&ges" + output = [["Character", "&ges"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gescc without a semi-colon" do + input = "&gescc" + output = [["Character", "&gescc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gesdot without a semi-colon" do + input = "&gesdot" + output = [["Character", "&gesdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gesdoto without a semi-colon" do + input = "&gesdoto" + output = [["Character", "&gesdoto"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gesdotol without a semi-colon" do + input = "&gesdotol" + output = [["Character", "&gesdotol"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gesl without a semi-colon" do + input = "&gesl" + output = [["Character", "&gesl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gesles without a semi-colon" do + input = "&gesles" + output = [["Character", "&gesles"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gfr without a semi-colon" do + input = "&gfr" + output = [["Character", "&gfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gg without a semi-colon" do + input = "&gg" + output = [["Character", "&gg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ggg without a semi-colon" do + input = "&ggg" + output = [["Character", "&ggg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gimel without a semi-colon" do + input = "&gimel" + output = [["Character", "&gimel"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gjcy without a semi-colon" do + input = "&gjcy" + output = [["Character", "&gjcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gl without a semi-colon" do + input = "&gl" + output = [["Character", "&gl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: glE without a semi-colon" do + input = "&glE" + output = [["Character", "&glE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gla without a semi-colon" do + input = "&gla" + output = [["Character", "&gla"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: glj without a semi-colon" do + input = "&glj" + output = [["Character", "&glj"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gnE without a semi-colon" do + input = "&gnE" + output = [["Character", "&gnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gnap without a semi-colon" do + input = "&gnap" + output = [["Character", "&gnap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gnapprox without a semi-colon" do + input = "&gnapprox" + output = [["Character", "&gnapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gne without a semi-colon" do + input = "&gne" + output = [["Character", "&gne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part11_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part11_test.exs new file mode 100644 index 00000000..1c92abc0 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part11_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart11Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: gneq without a semi-colon" do + input = "&gneq" + output = [["Character", "&gneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gneqq without a semi-colon" do + input = "&gneqq" + output = [["Character", "&gneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gnsim without a semi-colon" do + input = "&gnsim" + output = [["Character", "&gnsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gopf without a semi-colon" do + input = "&gopf" + output = [["Character", "&gopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: grave without a semi-colon" do + input = "&grave" + output = [["Character", "&grave"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gscr without a semi-colon" do + input = "&gscr" + output = [["Character", "&gscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gsim without a semi-colon" do + input = "&gsim" + output = [["Character", "&gsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gsime without a semi-colon" do + input = "&gsime" + output = [["Character", "&gsime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gsiml without a semi-colon" do + input = "&gsiml" + output = [["Character", "&gsiml"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gvertneqq without a semi-colon" do + input = "&gvertneqq" + output = [["Character", "&gvertneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: gvnE without a semi-colon" do + input = "&gvnE" + output = [["Character", "&gvnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hArr without a semi-colon" do + input = "&hArr" + output = [["Character", "&hArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hairsp without a semi-colon" do + input = "&hairsp" + output = [["Character", "&hairsp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: half without a semi-colon" do + input = "&half" + output = [["Character", "&half"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hamilt without a semi-colon" do + input = "&hamilt" + output = [["Character", "&hamilt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hardcy without a semi-colon" do + input = "&hardcy" + output = [["Character", "&hardcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: harr without a semi-colon" do + input = "&harr" + output = [["Character", "&harr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: harrcir without a semi-colon" do + input = "&harrcir" + output = [["Character", "&harrcir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: harrw without a semi-colon" do + input = "&harrw" + output = [["Character", "&harrw"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hbar without a semi-colon" do + input = "&hbar" + output = [["Character", "&hbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hcirc without a semi-colon" do + input = "&hcirc" + output = [["Character", "&hcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hearts without a semi-colon" do + input = "&hearts" + output = [["Character", "&hearts"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: heartsuit without a semi-colon" do + input = "&heartsuit" + output = [["Character", "&heartsuit"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hellip without a semi-colon" do + input = "&hellip" + output = [["Character", "&hellip"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hercon without a semi-colon" do + input = "&hercon" + output = [["Character", "&hercon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hfr without a semi-colon" do + input = "&hfr" + output = [["Character", "&hfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hksearow without a semi-colon" do + input = "&hksearow" + output = [["Character", "&hksearow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hkswarow without a semi-colon" do + input = "&hkswarow" + output = [["Character", "&hkswarow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hoarr without a semi-colon" do + input = "&hoarr" + output = [["Character", "&hoarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: homtht without a semi-colon" do + input = "&homtht" + output = [["Character", "&homtht"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hookleftarrow without a semi-colon" do + input = "&hookleftarrow" + output = [["Character", "&hookleftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hookrightarrow without a semi-colon" do + input = "&hookrightarrow" + output = [["Character", "&hookrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hopf without a semi-colon" do + input = "&hopf" + output = [["Character", "&hopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: horbar without a semi-colon" do + input = "&horbar" + output = [["Character", "&horbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hscr without a semi-colon" do + input = "&hscr" + output = [["Character", "&hscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hslash without a semi-colon" do + input = "&hslash" + output = [["Character", "&hslash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hstrok without a semi-colon" do + input = "&hstrok" + output = [["Character", "&hstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hybull without a semi-colon" do + input = "&hybull" + output = [["Character", "&hybull"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: hyphen without a semi-colon" do + input = "&hyphen" + output = [["Character", "&hyphen"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ic without a semi-colon" do + input = "&ic" + output = [["Character", "&ic"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: icy without a semi-colon" do + input = "&icy" + output = [["Character", "&icy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iecy without a semi-colon" do + input = "&iecy" + output = [["Character", "&iecy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iff without a semi-colon" do + input = "&iff" + output = [["Character", "&iff"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ifr without a semi-colon" do + input = "&ifr" + output = [["Character", "&ifr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ii without a semi-colon" do + input = "&ii" + output = [["Character", "&ii"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iiiint without a semi-colon" do + input = "&iiiint" + output = [["Character", "&iiiint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iiint without a semi-colon" do + input = "&iiint" + output = [["Character", "&iiint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iinfin without a semi-colon" do + input = "&iinfin" + output = [["Character", "&iinfin"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iiota without a semi-colon" do + input = "&iiota" + output = [["Character", "&iiota"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ijlig without a semi-colon" do + input = "&ijlig" + output = [["Character", "&ijlig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: imacr without a semi-colon" do + input = "&imacr" + output = [["Character", "&imacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: image without a semi-colon" do + input = "&image" + output = [["Character", "&image"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: imagline without a semi-colon" do + input = "&imagline" + output = [["Character", "&imagline"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: imagpart without a semi-colon" do + input = "&imagpart" + output = [["Character", "&imagpart"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: imath without a semi-colon" do + input = "&imath" + output = [["Character", "&imath"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: imof without a semi-colon" do + input = "&imof" + output = [["Character", "&imof"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: imped without a semi-colon" do + input = "&imped" + output = [["Character", "&imped"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: in without a semi-colon" do + input = "&in" + output = [["Character", "&in"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: incare without a semi-colon" do + input = "&incare" + output = [["Character", "&incare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: infin without a semi-colon" do + input = "&infin" + output = [["Character", "&infin"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: infintie without a semi-colon" do + input = "&infintie" + output = [["Character", "&infintie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: inodot without a semi-colon" do + input = "&inodot" + output = [["Character", "&inodot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: int without a semi-colon" do + input = "&int" + output = [["Character", "&int"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: intcal without a semi-colon" do + input = "&intcal" + output = [["Character", "&intcal"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: integers without a semi-colon" do + input = "&integers" + output = [["Character", "&integers"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: intercal without a semi-colon" do + input = "&intercal" + output = [["Character", "&intercal"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: intlarhk without a semi-colon" do + input = "&intlarhk" + output = [["Character", "&intlarhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: intprod without a semi-colon" do + input = "&intprod" + output = [["Character", "&intprod"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iocy without a semi-colon" do + input = "&iocy" + output = [["Character", "&iocy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iogon without a semi-colon" do + input = "&iogon" + output = [["Character", "&iogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iopf without a semi-colon" do + input = "&iopf" + output = [["Character", "&iopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iota without a semi-colon" do + input = "&iota" + output = [["Character", "&iota"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iprod without a semi-colon" do + input = "&iprod" + output = [["Character", "&iprod"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iscr without a semi-colon" do + input = "&iscr" + output = [["Character", "&iscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: isin without a semi-colon" do + input = "&isin" + output = [["Character", "&isin"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: isinE without a semi-colon" do + input = "&isinE" + output = [["Character", "&isinE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: isindot without a semi-colon" do + input = "&isindot" + output = [["Character", "&isindot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: isins without a semi-colon" do + input = "&isins" + output = [["Character", "&isins"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: isinsv without a semi-colon" do + input = "&isinsv" + output = [["Character", "&isinsv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: isinv without a semi-colon" do + input = "&isinv" + output = [["Character", "&isinv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: it without a semi-colon" do + input = "&it" + output = [["Character", "&it"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: itilde without a semi-colon" do + input = "&itilde" + output = [["Character", "&itilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: iukcy without a semi-colon" do + input = "&iukcy" + output = [["Character", "&iukcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jcirc without a semi-colon" do + input = "&jcirc" + output = [["Character", "&jcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jcy without a semi-colon" do + input = "&jcy" + output = [["Character", "&jcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jfr without a semi-colon" do + input = "&jfr" + output = [["Character", "&jfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jmath without a semi-colon" do + input = "&jmath" + output = [["Character", "&jmath"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jopf without a semi-colon" do + input = "&jopf" + output = [["Character", "&jopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jscr without a semi-colon" do + input = "&jscr" + output = [["Character", "&jscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jsercy without a semi-colon" do + input = "&jsercy" + output = [["Character", "&jsercy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: jukcy without a semi-colon" do + input = "&jukcy" + output = [["Character", "&jukcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kappa without a semi-colon" do + input = "&kappa" + output = [["Character", "&kappa"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kappav without a semi-colon" do + input = "&kappav" + output = [["Character", "&kappav"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kcedil without a semi-colon" do + input = "&kcedil" + output = [["Character", "&kcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kcy without a semi-colon" do + input = "&kcy" + output = [["Character", "&kcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kfr without a semi-colon" do + input = "&kfr" + output = [["Character", "&kfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kgreen without a semi-colon" do + input = "&kgreen" + output = [["Character", "&kgreen"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: khcy without a semi-colon" do + input = "&khcy" + output = [["Character", "&khcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kjcy without a semi-colon" do + input = "&kjcy" + output = [["Character", "&kjcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: kopf without a semi-colon" do + input = "&kopf" + output = [["Character", "&kopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part12_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part12_test.exs new file mode 100644 index 00000000..8a8f95c6 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part12_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart12Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: kscr without a semi-colon" do + input = "&kscr" + output = [["Character", "&kscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lAarr without a semi-colon" do + input = "&lAarr" + output = [["Character", "&lAarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lArr without a semi-colon" do + input = "&lArr" + output = [["Character", "&lArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lAtail without a semi-colon" do + input = "&lAtail" + output = [["Character", "&lAtail"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lBarr without a semi-colon" do + input = "&lBarr" + output = [["Character", "&lBarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lE without a semi-colon" do + input = "&lE" + output = [["Character", "&lE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lEg without a semi-colon" do + input = "&lEg" + output = [["Character", "&lEg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lHar without a semi-colon" do + input = "&lHar" + output = [["Character", "&lHar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lacute without a semi-colon" do + input = "&lacute" + output = [["Character", "&lacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: laemptyv without a semi-colon" do + input = "&laemptyv" + output = [["Character", "&laemptyv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lagran without a semi-colon" do + input = "&lagran" + output = [["Character", "&lagran"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lambda without a semi-colon" do + input = "&lambda" + output = [["Character", "&lambda"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lang without a semi-colon" do + input = "&lang" + output = [["Character", "&lang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: langd without a semi-colon" do + input = "&langd" + output = [["Character", "&langd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: langle without a semi-colon" do + input = "&langle" + output = [["Character", "&langle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lap without a semi-colon" do + input = "&lap" + output = [["Character", "&lap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larr without a semi-colon" do + input = "&larr" + output = [["Character", "&larr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrb without a semi-colon" do + input = "&larrb" + output = [["Character", "&larrb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrbfs without a semi-colon" do + input = "&larrbfs" + output = [["Character", "&larrbfs"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrfs without a semi-colon" do + input = "&larrfs" + output = [["Character", "&larrfs"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrhk without a semi-colon" do + input = "&larrhk" + output = [["Character", "&larrhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrlp without a semi-colon" do + input = "&larrlp" + output = [["Character", "&larrlp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrpl without a semi-colon" do + input = "&larrpl" + output = [["Character", "&larrpl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrsim without a semi-colon" do + input = "&larrsim" + output = [["Character", "&larrsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: larrtl without a semi-colon" do + input = "&larrtl" + output = [["Character", "&larrtl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lat without a semi-colon" do + input = "&lat" + output = [["Character", "&lat"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: latail without a semi-colon" do + input = "&latail" + output = [["Character", "&latail"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: late without a semi-colon" do + input = "&late" + output = [["Character", "&late"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lates without a semi-colon" do + input = "&lates" + output = [["Character", "&lates"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbarr without a semi-colon" do + input = "&lbarr" + output = [["Character", "&lbarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbbrk without a semi-colon" do + input = "&lbbrk" + output = [["Character", "&lbbrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbrace without a semi-colon" do + input = "&lbrace" + output = [["Character", "&lbrace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbrack without a semi-colon" do + input = "&lbrack" + output = [["Character", "&lbrack"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbrke without a semi-colon" do + input = "&lbrke" + output = [["Character", "&lbrke"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbrksld without a semi-colon" do + input = "&lbrksld" + output = [["Character", "&lbrksld"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lbrkslu without a semi-colon" do + input = "&lbrkslu" + output = [["Character", "&lbrkslu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lcaron without a semi-colon" do + input = "&lcaron" + output = [["Character", "&lcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lcedil without a semi-colon" do + input = "&lcedil" + output = [["Character", "&lcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lceil without a semi-colon" do + input = "&lceil" + output = [["Character", "&lceil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lcub without a semi-colon" do + input = "&lcub" + output = [["Character", "&lcub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lcy without a semi-colon" do + input = "&lcy" + output = [["Character", "&lcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ldca without a semi-colon" do + input = "&ldca" + output = [["Character", "&ldca"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ldquo without a semi-colon" do + input = "&ldquo" + output = [["Character", "&ldquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ldquor without a semi-colon" do + input = "&ldquor" + output = [["Character", "&ldquor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ldrdhar without a semi-colon" do + input = "&ldrdhar" + output = [["Character", "&ldrdhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ldrushar without a semi-colon" do + input = "&ldrushar" + output = [["Character", "&ldrushar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ldsh without a semi-colon" do + input = "&ldsh" + output = [["Character", "&ldsh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: le without a semi-colon" do + input = "&le" + output = [["Character", "&le"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftarrow without a semi-colon" do + input = "&leftarrow" + output = [["Character", "&leftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftarrowtail without a semi-colon" do + input = "&leftarrowtail" + output = [["Character", "&leftarrowtail"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftharpoondown without a semi-colon" do + input = "&leftharpoondown" + output = [["Character", "&leftharpoondown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftharpoonup without a semi-colon" do + input = "&leftharpoonup" + output = [["Character", "&leftharpoonup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftleftarrows without a semi-colon" do + input = "&leftleftarrows" + output = [["Character", "&leftleftarrows"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftrightarrow without a semi-colon" do + input = "&leftrightarrow" + output = [["Character", "&leftrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftrightarrows without a semi-colon" do + input = "&leftrightarrows" + output = [["Character", "&leftrightarrows"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftrightharpoons without a semi-colon" do + input = "&leftrightharpoons" + output = [["Character", "&leftrightharpoons"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftrightsquigarrow without a semi-colon" do + input = "&leftrightsquigarrow" + output = [["Character", "&leftrightsquigarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leftthreetimes without a semi-colon" do + input = "&leftthreetimes" + output = [["Character", "&leftthreetimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leg without a semi-colon" do + input = "&leg" + output = [["Character", "&leg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leq without a semi-colon" do + input = "&leq" + output = [["Character", "&leq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leqq without a semi-colon" do + input = "&leqq" + output = [["Character", "&leqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: leqslant without a semi-colon" do + input = "&leqslant" + output = [["Character", "&leqslant"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: les without a semi-colon" do + input = "&les" + output = [["Character", "&les"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lescc without a semi-colon" do + input = "&lescc" + output = [["Character", "&lescc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesdot without a semi-colon" do + input = "&lesdot" + output = [["Character", "&lesdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesdoto without a semi-colon" do + input = "&lesdoto" + output = [["Character", "&lesdoto"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesdotor without a semi-colon" do + input = "&lesdotor" + output = [["Character", "&lesdotor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesg without a semi-colon" do + input = "&lesg" + output = [["Character", "&lesg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesges without a semi-colon" do + input = "&lesges" + output = [["Character", "&lesges"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lessapprox without a semi-colon" do + input = "&lessapprox" + output = [["Character", "&lessapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lessdot without a semi-colon" do + input = "&lessdot" + output = [["Character", "&lessdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesseqgtr without a semi-colon" do + input = "&lesseqgtr" + output = [["Character", "&lesseqgtr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesseqqgtr without a semi-colon" do + input = "&lesseqqgtr" + output = [["Character", "&lesseqqgtr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lessgtr without a semi-colon" do + input = "&lessgtr" + output = [["Character", "&lessgtr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lesssim without a semi-colon" do + input = "&lesssim" + output = [["Character", "&lesssim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lfisht without a semi-colon" do + input = "&lfisht" + output = [["Character", "&lfisht"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lfloor without a semi-colon" do + input = "&lfloor" + output = [["Character", "&lfloor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lfr without a semi-colon" do + input = "&lfr" + output = [["Character", "&lfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lg without a semi-colon" do + input = "&lg" + output = [["Character", "&lg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lgE without a semi-colon" do + input = "&lgE" + output = [["Character", "&lgE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lhard without a semi-colon" do + input = "&lhard" + output = [["Character", "&lhard"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lharu without a semi-colon" do + input = "&lharu" + output = [["Character", "&lharu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lharul without a semi-colon" do + input = "&lharul" + output = [["Character", "&lharul"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lhblk without a semi-colon" do + input = "&lhblk" + output = [["Character", "&lhblk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ljcy without a semi-colon" do + input = "&ljcy" + output = [["Character", "&ljcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ll without a semi-colon" do + input = "&ll" + output = [["Character", "&ll"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: llarr without a semi-colon" do + input = "&llarr" + output = [["Character", "&llarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: llcorner without a semi-colon" do + input = "&llcorner" + output = [["Character", "&llcorner"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: llhard without a semi-colon" do + input = "&llhard" + output = [["Character", "&llhard"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lltri without a semi-colon" do + input = "&lltri" + output = [["Character", "&lltri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lmidot without a semi-colon" do + input = "&lmidot" + output = [["Character", "&lmidot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lmoust without a semi-colon" do + input = "&lmoust" + output = [["Character", "&lmoust"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lmoustache without a semi-colon" do + input = "&lmoustache" + output = [["Character", "&lmoustache"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lnE without a semi-colon" do + input = "&lnE" + output = [["Character", "&lnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lnap without a semi-colon" do + input = "&lnap" + output = [["Character", "&lnap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lnapprox without a semi-colon" do + input = "&lnapprox" + output = [["Character", "&lnapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lne without a semi-colon" do + input = "&lne" + output = [["Character", "&lne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lneq without a semi-colon" do + input = "&lneq" + output = [["Character", "&lneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lneqq without a semi-colon" do + input = "&lneqq" + output = [["Character", "&lneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lnsim without a semi-colon" do + input = "&lnsim" + output = [["Character", "&lnsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part13_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part13_test.exs new file mode 100644 index 00000000..426af550 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part13_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart13Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: loang without a semi-colon" do + input = "&loang" + output = [["Character", "&loang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: loarr without a semi-colon" do + input = "&loarr" + output = [["Character", "&loarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lobrk without a semi-colon" do + input = "&lobrk" + output = [["Character", "&lobrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: longleftarrow without a semi-colon" do + input = "&longleftarrow" + output = [["Character", "&longleftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: longleftrightarrow without a semi-colon" do + input = "&longleftrightarrow" + output = [["Character", "&longleftrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: longmapsto without a semi-colon" do + input = "&longmapsto" + output = [["Character", "&longmapsto"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: longrightarrow without a semi-colon" do + input = "&longrightarrow" + output = [["Character", "&longrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: looparrowleft without a semi-colon" do + input = "&looparrowleft" + output = [["Character", "&looparrowleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: looparrowright without a semi-colon" do + input = "&looparrowright" + output = [["Character", "&looparrowright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lopar without a semi-colon" do + input = "&lopar" + output = [["Character", "&lopar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lopf without a semi-colon" do + input = "&lopf" + output = [["Character", "&lopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: loplus without a semi-colon" do + input = "&loplus" + output = [["Character", "&loplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lotimes without a semi-colon" do + input = "&lotimes" + output = [["Character", "&lotimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lowast without a semi-colon" do + input = "&lowast" + output = [["Character", "&lowast"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lowbar without a semi-colon" do + input = "&lowbar" + output = [["Character", "&lowbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: loz without a semi-colon" do + input = "&loz" + output = [["Character", "&loz"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lozenge without a semi-colon" do + input = "&lozenge" + output = [["Character", "&lozenge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lozf without a semi-colon" do + input = "&lozf" + output = [["Character", "&lozf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lpar without a semi-colon" do + input = "&lpar" + output = [["Character", "&lpar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lparlt without a semi-colon" do + input = "&lparlt" + output = [["Character", "&lparlt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lrarr without a semi-colon" do + input = "&lrarr" + output = [["Character", "&lrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lrcorner without a semi-colon" do + input = "&lrcorner" + output = [["Character", "&lrcorner"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lrhar without a semi-colon" do + input = "&lrhar" + output = [["Character", "&lrhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lrhard without a semi-colon" do + input = "&lrhard" + output = [["Character", "&lrhard"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lrm without a semi-colon" do + input = "&lrm" + output = [["Character", "&lrm"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lrtri without a semi-colon" do + input = "&lrtri" + output = [["Character", "&lrtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsaquo without a semi-colon" do + input = "&lsaquo" + output = [["Character", "&lsaquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lscr without a semi-colon" do + input = "&lscr" + output = [["Character", "&lscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsh without a semi-colon" do + input = "&lsh" + output = [["Character", "&lsh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsim without a semi-colon" do + input = "&lsim" + output = [["Character", "&lsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsime without a semi-colon" do + input = "&lsime" + output = [["Character", "&lsime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsimg without a semi-colon" do + input = "&lsimg" + output = [["Character", "&lsimg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsqb without a semi-colon" do + input = "&lsqb" + output = [["Character", "&lsqb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsquo without a semi-colon" do + input = "&lsquo" + output = [["Character", "&lsquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lsquor without a semi-colon" do + input = "&lsquor" + output = [["Character", "&lsquor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lstrok without a semi-colon" do + input = "&lstrok" + output = [["Character", "&lstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lurdshar without a semi-colon" do + input = "&lurdshar" + output = [["Character", "&lurdshar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: luruhar without a semi-colon" do + input = "&luruhar" + output = [["Character", "&luruhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lvertneqq without a semi-colon" do + input = "&lvertneqq" + output = [["Character", "&lvertneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: lvnE without a semi-colon" do + input = "&lvnE" + output = [["Character", "&lvnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mDDot without a semi-colon" do + input = "&mDDot" + output = [["Character", "&mDDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: male without a semi-colon" do + input = "&male" + output = [["Character", "&male"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: malt without a semi-colon" do + input = "&malt" + output = [["Character", "&malt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: maltese without a semi-colon" do + input = "&maltese" + output = [["Character", "&maltese"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: map without a semi-colon" do + input = "&map" + output = [["Character", "&map"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mapsto without a semi-colon" do + input = "&mapsto" + output = [["Character", "&mapsto"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mapstodown without a semi-colon" do + input = "&mapstodown" + output = [["Character", "&mapstodown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mapstoleft without a semi-colon" do + input = "&mapstoleft" + output = [["Character", "&mapstoleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mapstoup without a semi-colon" do + input = "&mapstoup" + output = [["Character", "&mapstoup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: marker without a semi-colon" do + input = "&marker" + output = [["Character", "&marker"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mcomma without a semi-colon" do + input = "&mcomma" + output = [["Character", "&mcomma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mcy without a semi-colon" do + input = "&mcy" + output = [["Character", "&mcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mdash without a semi-colon" do + input = "&mdash" + output = [["Character", "&mdash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: measuredangle without a semi-colon" do + input = "&measuredangle" + output = [["Character", "&measuredangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mfr without a semi-colon" do + input = "&mfr" + output = [["Character", "&mfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mho without a semi-colon" do + input = "&mho" + output = [["Character", "&mho"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mid without a semi-colon" do + input = "&mid" + output = [["Character", "&mid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: midast without a semi-colon" do + input = "&midast" + output = [["Character", "&midast"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: midcir without a semi-colon" do + input = "&midcir" + output = [["Character", "&midcir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: minus without a semi-colon" do + input = "&minus" + output = [["Character", "&minus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: minusb without a semi-colon" do + input = "&minusb" + output = [["Character", "&minusb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: minusd without a semi-colon" do + input = "&minusd" + output = [["Character", "&minusd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: minusdu without a semi-colon" do + input = "&minusdu" + output = [["Character", "&minusdu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mlcp without a semi-colon" do + input = "&mlcp" + output = [["Character", "&mlcp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mldr without a semi-colon" do + input = "&mldr" + output = [["Character", "&mldr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mnplus without a semi-colon" do + input = "&mnplus" + output = [["Character", "&mnplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: models without a semi-colon" do + input = "&models" + output = [["Character", "&models"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mopf without a semi-colon" do + input = "&mopf" + output = [["Character", "&mopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mp without a semi-colon" do + input = "&mp" + output = [["Character", "&mp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mscr without a semi-colon" do + input = "&mscr" + output = [["Character", "&mscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mstpos without a semi-colon" do + input = "&mstpos" + output = [["Character", "&mstpos"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mu without a semi-colon" do + input = "&mu" + output = [["Character", "&mu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: multimap without a semi-colon" do + input = "&multimap" + output = [["Character", "&multimap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: mumap without a semi-colon" do + input = "&mumap" + output = [["Character", "&mumap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nGg without a semi-colon" do + input = "&nGg" + output = [["Character", "&nGg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nGt without a semi-colon" do + input = "&nGt" + output = [["Character", "&nGt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nGtv without a semi-colon" do + input = "&nGtv" + output = [["Character", "&nGtv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nLeftarrow without a semi-colon" do + input = "&nLeftarrow" + output = [["Character", "&nLeftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nLeftrightarrow without a semi-colon" do + input = "&nLeftrightarrow" + output = [["Character", "&nLeftrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nLl without a semi-colon" do + input = "&nLl" + output = [["Character", "&nLl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nLt without a semi-colon" do + input = "&nLt" + output = [["Character", "&nLt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nLtv without a semi-colon" do + input = "&nLtv" + output = [["Character", "&nLtv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nRightarrow without a semi-colon" do + input = "&nRightarrow" + output = [["Character", "&nRightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nVDash without a semi-colon" do + input = "&nVDash" + output = [["Character", "&nVDash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nVdash without a semi-colon" do + input = "&nVdash" + output = [["Character", "&nVdash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nabla without a semi-colon" do + input = "&nabla" + output = [["Character", "&nabla"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nacute without a semi-colon" do + input = "&nacute" + output = [["Character", "&nacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nang without a semi-colon" do + input = "&nang" + output = [["Character", "&nang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nap without a semi-colon" do + input = "&nap" + output = [["Character", "&nap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: napE without a semi-colon" do + input = "&napE" + output = [["Character", "&napE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: napid without a semi-colon" do + input = "&napid" + output = [["Character", "&napid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: napos without a semi-colon" do + input = "&napos" + output = [["Character", "&napos"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: napprox without a semi-colon" do + input = "&napprox" + output = [["Character", "&napprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: natur without a semi-colon" do + input = "&natur" + output = [["Character", "&natur"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: natural without a semi-colon" do + input = "&natural" + output = [["Character", "&natural"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: naturals without a semi-colon" do + input = "&naturals" + output = [["Character", "&naturals"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nbump without a semi-colon" do + input = "&nbump" + output = [["Character", "&nbump"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nbumpe without a semi-colon" do + input = "&nbumpe" + output = [["Character", "&nbumpe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ncap without a semi-colon" do + input = "&ncap" + output = [["Character", "&ncap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ncaron without a semi-colon" do + input = "&ncaron" + output = [["Character", "&ncaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part14_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part14_test.exs new file mode 100644 index 00000000..a766a7aa --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part14_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart14Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: ncedil without a semi-colon" do + input = "&ncedil" + output = [["Character", "&ncedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ncong without a semi-colon" do + input = "&ncong" + output = [["Character", "&ncong"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ncongdot without a semi-colon" do + input = "&ncongdot" + output = [["Character", "&ncongdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ncup without a semi-colon" do + input = "&ncup" + output = [["Character", "&ncup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ncy without a semi-colon" do + input = "&ncy" + output = [["Character", "&ncy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ndash without a semi-colon" do + input = "&ndash" + output = [["Character", "&ndash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ne without a semi-colon" do + input = "&ne" + output = [["Character", "&ne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: neArr without a semi-colon" do + input = "&neArr" + output = [["Character", "&neArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nearhk without a semi-colon" do + input = "&nearhk" + output = [["Character", "&nearhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nearr without a semi-colon" do + input = "&nearr" + output = [["Character", "&nearr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nearrow without a semi-colon" do + input = "&nearrow" + output = [["Character", "&nearrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nedot without a semi-colon" do + input = "&nedot" + output = [["Character", "&nedot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nequiv without a semi-colon" do + input = "&nequiv" + output = [["Character", "&nequiv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nesear without a semi-colon" do + input = "&nesear" + output = [["Character", "&nesear"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nesim without a semi-colon" do + input = "&nesim" + output = [["Character", "&nesim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nexist without a semi-colon" do + input = "&nexist" + output = [["Character", "&nexist"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nexists without a semi-colon" do + input = "&nexists" + output = [["Character", "&nexists"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nfr without a semi-colon" do + input = "&nfr" + output = [["Character", "&nfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngE without a semi-colon" do + input = "&ngE" + output = [["Character", "&ngE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nge without a semi-colon" do + input = "&nge" + output = [["Character", "&nge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngeq without a semi-colon" do + input = "&ngeq" + output = [["Character", "&ngeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngeqq without a semi-colon" do + input = "&ngeqq" + output = [["Character", "&ngeqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngeqslant without a semi-colon" do + input = "&ngeqslant" + output = [["Character", "&ngeqslant"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nges without a semi-colon" do + input = "&nges" + output = [["Character", "&nges"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngsim without a semi-colon" do + input = "&ngsim" + output = [["Character", "&ngsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngt without a semi-colon" do + input = "&ngt" + output = [["Character", "&ngt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ngtr without a semi-colon" do + input = "&ngtr" + output = [["Character", "&ngtr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nhArr without a semi-colon" do + input = "&nhArr" + output = [["Character", "&nhArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nharr without a semi-colon" do + input = "&nharr" + output = [["Character", "&nharr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nhpar without a semi-colon" do + input = "&nhpar" + output = [["Character", "&nhpar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ni without a semi-colon" do + input = "&ni" + output = [["Character", "&ni"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nis without a semi-colon" do + input = "&nis" + output = [["Character", "&nis"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nisd without a semi-colon" do + input = "&nisd" + output = [["Character", "&nisd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: niv without a semi-colon" do + input = "&niv" + output = [["Character", "&niv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: njcy without a semi-colon" do + input = "&njcy" + output = [["Character", "&njcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nlArr without a semi-colon" do + input = "&nlArr" + output = [["Character", "&nlArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nlE without a semi-colon" do + input = "&nlE" + output = [["Character", "&nlE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nlarr without a semi-colon" do + input = "&nlarr" + output = [["Character", "&nlarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nldr without a semi-colon" do + input = "&nldr" + output = [["Character", "&nldr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nle without a semi-colon" do + input = "&nle" + output = [["Character", "&nle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nleftarrow without a semi-colon" do + input = "&nleftarrow" + output = [["Character", "&nleftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nleftrightarrow without a semi-colon" do + input = "&nleftrightarrow" + output = [["Character", "&nleftrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nleq without a semi-colon" do + input = "&nleq" + output = [["Character", "&nleq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nleqq without a semi-colon" do + input = "&nleqq" + output = [["Character", "&nleqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nleqslant without a semi-colon" do + input = "&nleqslant" + output = [["Character", "&nleqslant"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nles without a semi-colon" do + input = "&nles" + output = [["Character", "&nles"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nless without a semi-colon" do + input = "&nless" + output = [["Character", "&nless"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nlsim without a semi-colon" do + input = "&nlsim" + output = [["Character", "&nlsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nlt without a semi-colon" do + input = "&nlt" + output = [["Character", "&nlt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nltri without a semi-colon" do + input = "&nltri" + output = [["Character", "&nltri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nltrie without a semi-colon" do + input = "&nltrie" + output = [["Character", "&nltrie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nmid without a semi-colon" do + input = "&nmid" + output = [["Character", "&nmid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nopf without a semi-colon" do + input = "&nopf" + output = [["Character", "&nopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: npar without a semi-colon" do + input = "&npar" + output = [["Character", "&npar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nparallel without a semi-colon" do + input = "&nparallel" + output = [["Character", "&nparallel"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nparsl without a semi-colon" do + input = "&nparsl" + output = [["Character", "&nparsl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: npart without a semi-colon" do + input = "&npart" + output = [["Character", "&npart"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: npolint without a semi-colon" do + input = "&npolint" + output = [["Character", "&npolint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: npr without a semi-colon" do + input = "&npr" + output = [["Character", "&npr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nprcue without a semi-colon" do + input = "&nprcue" + output = [["Character", "&nprcue"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: npre without a semi-colon" do + input = "&npre" + output = [["Character", "&npre"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nprec without a semi-colon" do + input = "&nprec" + output = [["Character", "&nprec"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: npreceq without a semi-colon" do + input = "&npreceq" + output = [["Character", "&npreceq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrArr without a semi-colon" do + input = "&nrArr" + output = [["Character", "&nrArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrarr without a semi-colon" do + input = "&nrarr" + output = [["Character", "&nrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrarrc without a semi-colon" do + input = "&nrarrc" + output = [["Character", "&nrarrc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrarrw without a semi-colon" do + input = "&nrarrw" + output = [["Character", "&nrarrw"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrightarrow without a semi-colon" do + input = "&nrightarrow" + output = [["Character", "&nrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrtri without a semi-colon" do + input = "&nrtri" + output = [["Character", "&nrtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nrtrie without a semi-colon" do + input = "&nrtrie" + output = [["Character", "&nrtrie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsc without a semi-colon" do + input = "&nsc" + output = [["Character", "&nsc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsccue without a semi-colon" do + input = "&nsccue" + output = [["Character", "&nsccue"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsce without a semi-colon" do + input = "&nsce" + output = [["Character", "&nsce"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nscr without a semi-colon" do + input = "&nscr" + output = [["Character", "&nscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nshortmid without a semi-colon" do + input = "&nshortmid" + output = [["Character", "&nshortmid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nshortparallel without a semi-colon" do + input = "&nshortparallel" + output = [["Character", "&nshortparallel"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsim without a semi-colon" do + input = "&nsim" + output = [["Character", "&nsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsime without a semi-colon" do + input = "&nsime" + output = [["Character", "&nsime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsimeq without a semi-colon" do + input = "&nsimeq" + output = [["Character", "&nsimeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsmid without a semi-colon" do + input = "&nsmid" + output = [["Character", "&nsmid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nspar without a semi-colon" do + input = "&nspar" + output = [["Character", "&nspar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsqsube without a semi-colon" do + input = "&nsqsube" + output = [["Character", "&nsqsube"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsqsupe without a semi-colon" do + input = "&nsqsupe" + output = [["Character", "&nsqsupe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsub without a semi-colon" do + input = "&nsub" + output = [["Character", "&nsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsubE without a semi-colon" do + input = "&nsubE" + output = [["Character", "&nsubE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsube without a semi-colon" do + input = "&nsube" + output = [["Character", "&nsube"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsubset without a semi-colon" do + input = "&nsubset" + output = [["Character", "&nsubset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsubseteq without a semi-colon" do + input = "&nsubseteq" + output = [["Character", "&nsubseteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsubseteqq without a semi-colon" do + input = "&nsubseteqq" + output = [["Character", "&nsubseteqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsucc without a semi-colon" do + input = "&nsucc" + output = [["Character", "&nsucc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsucceq without a semi-colon" do + input = "&nsucceq" + output = [["Character", "&nsucceq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsup without a semi-colon" do + input = "&nsup" + output = [["Character", "&nsup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsupE without a semi-colon" do + input = "&nsupE" + output = [["Character", "&nsupE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsupe without a semi-colon" do + input = "&nsupe" + output = [["Character", "&nsupe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsupset without a semi-colon" do + input = "&nsupset" + output = [["Character", "&nsupset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsupseteq without a semi-colon" do + input = "&nsupseteq" + output = [["Character", "&nsupseteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nsupseteqq without a semi-colon" do + input = "&nsupseteqq" + output = [["Character", "&nsupseteqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ntgl without a semi-colon" do + input = "&ntgl" + output = [["Character", "&ntgl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ntlg without a semi-colon" do + input = "&ntlg" + output = [["Character", "&ntlg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ntriangleleft without a semi-colon" do + input = "&ntriangleleft" + output = [["Character", "&ntriangleleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part15_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part15_test.exs new file mode 100644 index 00000000..103af700 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part15_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart15Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: ntrianglelefteq without a semi-colon" do + input = "&ntrianglelefteq" + output = [["Character", "&ntrianglelefteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ntriangleright without a semi-colon" do + input = "&ntriangleright" + output = [["Character", "&ntriangleright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ntrianglerighteq without a semi-colon" do + input = "&ntrianglerighteq" + output = [["Character", "&ntrianglerighteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nu without a semi-colon" do + input = "&nu" + output = [["Character", "&nu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: num without a semi-colon" do + input = "&num" + output = [["Character", "&num"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: numero without a semi-colon" do + input = "&numero" + output = [["Character", "&numero"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: numsp without a semi-colon" do + input = "&numsp" + output = [["Character", "&numsp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvDash without a semi-colon" do + input = "&nvDash" + output = [["Character", "&nvDash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvHarr without a semi-colon" do + input = "&nvHarr" + output = [["Character", "&nvHarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvap without a semi-colon" do + input = "&nvap" + output = [["Character", "&nvap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvdash without a semi-colon" do + input = "&nvdash" + output = [["Character", "&nvdash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvge without a semi-colon" do + input = "&nvge" + output = [["Character", "&nvge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvgt without a semi-colon" do + input = "&nvgt" + output = [["Character", "&nvgt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvinfin without a semi-colon" do + input = "&nvinfin" + output = [["Character", "&nvinfin"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvlArr without a semi-colon" do + input = "&nvlArr" + output = [["Character", "&nvlArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvle without a semi-colon" do + input = "&nvle" + output = [["Character", "&nvle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvlt without a semi-colon" do + input = "&nvlt" + output = [["Character", "&nvlt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvltrie without a semi-colon" do + input = "&nvltrie" + output = [["Character", "&nvltrie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvrArr without a semi-colon" do + input = "&nvrArr" + output = [["Character", "&nvrArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvrtrie without a semi-colon" do + input = "&nvrtrie" + output = [["Character", "&nvrtrie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nvsim without a semi-colon" do + input = "&nvsim" + output = [["Character", "&nvsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nwArr without a semi-colon" do + input = "&nwArr" + output = [["Character", "&nwArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nwarhk without a semi-colon" do + input = "&nwarhk" + output = [["Character", "&nwarhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nwarr without a semi-colon" do + input = "&nwarr" + output = [["Character", "&nwarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nwarrow without a semi-colon" do + input = "&nwarrow" + output = [["Character", "&nwarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: nwnear without a semi-colon" do + input = "&nwnear" + output = [["Character", "&nwnear"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oS without a semi-colon" do + input = "&oS" + output = [["Character", "&oS"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oast without a semi-colon" do + input = "&oast" + output = [["Character", "&oast"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ocir without a semi-colon" do + input = "&ocir" + output = [["Character", "&ocir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ocy without a semi-colon" do + input = "&ocy" + output = [["Character", "&ocy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: odash without a semi-colon" do + input = "&odash" + output = [["Character", "&odash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: odblac without a semi-colon" do + input = "&odblac" + output = [["Character", "&odblac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: odiv without a semi-colon" do + input = "&odiv" + output = [["Character", "&odiv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: odot without a semi-colon" do + input = "&odot" + output = [["Character", "&odot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: odsold without a semi-colon" do + input = "&odsold" + output = [["Character", "&odsold"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oelig without a semi-colon" do + input = "&oelig" + output = [["Character", "&oelig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ofcir without a semi-colon" do + input = "&ofcir" + output = [["Character", "&ofcir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ofr without a semi-colon" do + input = "&ofr" + output = [["Character", "&ofr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ogon without a semi-colon" do + input = "&ogon" + output = [["Character", "&ogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ogt without a semi-colon" do + input = "&ogt" + output = [["Character", "&ogt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ohbar without a semi-colon" do + input = "&ohbar" + output = [["Character", "&ohbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ohm without a semi-colon" do + input = "&ohm" + output = [["Character", "&ohm"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oint without a semi-colon" do + input = "&oint" + output = [["Character", "&oint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: olarr without a semi-colon" do + input = "&olarr" + output = [["Character", "&olarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: olcir without a semi-colon" do + input = "&olcir" + output = [["Character", "&olcir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: olcross without a semi-colon" do + input = "&olcross" + output = [["Character", "&olcross"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oline without a semi-colon" do + input = "&oline" + output = [["Character", "&oline"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: olt without a semi-colon" do + input = "&olt" + output = [["Character", "&olt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: omacr without a semi-colon" do + input = "&omacr" + output = [["Character", "&omacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: omega without a semi-colon" do + input = "&omega" + output = [["Character", "&omega"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: omicron without a semi-colon" do + input = "&omicron" + output = [["Character", "&omicron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: omid without a semi-colon" do + input = "&omid" + output = [["Character", "&omid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ominus without a semi-colon" do + input = "&ominus" + output = [["Character", "&ominus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oopf without a semi-colon" do + input = "&oopf" + output = [["Character", "&oopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: opar without a semi-colon" do + input = "&opar" + output = [["Character", "&opar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: operp without a semi-colon" do + input = "&operp" + output = [["Character", "&operp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oplus without a semi-colon" do + input = "&oplus" + output = [["Character", "&oplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: or without a semi-colon" do + input = "&or" + output = [["Character", "&or"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: orarr without a semi-colon" do + input = "&orarr" + output = [["Character", "&orarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ord without a semi-colon" do + input = "&ord" + output = [["Character", "&ord"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: order without a semi-colon" do + input = "&order" + output = [["Character", "&order"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: orderof without a semi-colon" do + input = "&orderof" + output = [["Character", "&orderof"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: origof without a semi-colon" do + input = "&origof" + output = [["Character", "&origof"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oror without a semi-colon" do + input = "&oror" + output = [["Character", "&oror"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: orslope without a semi-colon" do + input = "&orslope" + output = [["Character", "&orslope"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: orv without a semi-colon" do + input = "&orv" + output = [["Character", "&orv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: oscr without a semi-colon" do + input = "&oscr" + output = [["Character", "&oscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: osol without a semi-colon" do + input = "&osol" + output = [["Character", "&osol"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: otimes without a semi-colon" do + input = "&otimes" + output = [["Character", "&otimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: otimesas without a semi-colon" do + input = "&otimesas" + output = [["Character", "&otimesas"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ovbar without a semi-colon" do + input = "&ovbar" + output = [["Character", "&ovbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: par without a semi-colon" do + input = "&par" + output = [["Character", "&par"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: parsim without a semi-colon" do + input = "&parsim" + output = [["Character", "&parsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: parsl without a semi-colon" do + input = "&parsl" + output = [["Character", "&parsl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: part without a semi-colon" do + input = "&part" + output = [["Character", "&part"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pcy without a semi-colon" do + input = "&pcy" + output = [["Character", "&pcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: percnt without a semi-colon" do + input = "&percnt" + output = [["Character", "&percnt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: period without a semi-colon" do + input = "&period" + output = [["Character", "&period"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: permil without a semi-colon" do + input = "&permil" + output = [["Character", "&permil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: perp without a semi-colon" do + input = "&perp" + output = [["Character", "&perp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pertenk without a semi-colon" do + input = "&pertenk" + output = [["Character", "&pertenk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pfr without a semi-colon" do + input = "&pfr" + output = [["Character", "&pfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: phi without a semi-colon" do + input = "&phi" + output = [["Character", "&phi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: phiv without a semi-colon" do + input = "&phiv" + output = [["Character", "&phiv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: phmmat without a semi-colon" do + input = "&phmmat" + output = [["Character", "&phmmat"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: phone without a semi-colon" do + input = "&phone" + output = [["Character", "&phone"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pi without a semi-colon" do + input = "&pi" + output = [["Character", "&pi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pitchfork without a semi-colon" do + input = "&pitchfork" + output = [["Character", "&pitchfork"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: piv without a semi-colon" do + input = "&piv" + output = [["Character", "&piv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: planck without a semi-colon" do + input = "&planck" + output = [["Character", "&planck"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: planckh without a semi-colon" do + input = "&planckh" + output = [["Character", "&planckh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plankv without a semi-colon" do + input = "&plankv" + output = [["Character", "&plankv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plus without a semi-colon" do + input = "&plus" + output = [["Character", "&plus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plusacir without a semi-colon" do + input = "&plusacir" + output = [["Character", "&plusacir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plusb without a semi-colon" do + input = "&plusb" + output = [["Character", "&plusb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pluscir without a semi-colon" do + input = "&pluscir" + output = [["Character", "&pluscir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plusdo without a semi-colon" do + input = "&plusdo" + output = [["Character", "&plusdo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plusdu without a semi-colon" do + input = "&plusdu" + output = [["Character", "&plusdu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pluse without a semi-colon" do + input = "&pluse" + output = [["Character", "&pluse"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: plussim without a semi-colon" do + input = "&plussim" + output = [["Character", "&plussim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part16_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part16_test.exs new file mode 100644 index 00000000..33e1de77 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part16_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart16Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: plustwo without a semi-colon" do + input = "&plustwo" + output = [["Character", "&plustwo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pm without a semi-colon" do + input = "&pm" + output = [["Character", "&pm"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pointint without a semi-colon" do + input = "&pointint" + output = [["Character", "&pointint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: popf without a semi-colon" do + input = "&popf" + output = [["Character", "&popf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pr without a semi-colon" do + input = "&pr" + output = [["Character", "&pr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prE without a semi-colon" do + input = "&prE" + output = [["Character", "&prE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prap without a semi-colon" do + input = "&prap" + output = [["Character", "&prap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prcue without a semi-colon" do + input = "&prcue" + output = [["Character", "&prcue"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pre without a semi-colon" do + input = "&pre" + output = [["Character", "&pre"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prec without a semi-colon" do + input = "&prec" + output = [["Character", "&prec"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: precapprox without a semi-colon" do + input = "&precapprox" + output = [["Character", "&precapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: preccurlyeq without a semi-colon" do + input = "&preccurlyeq" + output = [["Character", "&preccurlyeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: preceq without a semi-colon" do + input = "&preceq" + output = [["Character", "&preceq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: precnapprox without a semi-colon" do + input = "&precnapprox" + output = [["Character", "&precnapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: precneqq without a semi-colon" do + input = "&precneqq" + output = [["Character", "&precneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: precnsim without a semi-colon" do + input = "&precnsim" + output = [["Character", "&precnsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: precsim without a semi-colon" do + input = "&precsim" + output = [["Character", "&precsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prime without a semi-colon" do + input = "&prime" + output = [["Character", "&prime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: primes without a semi-colon" do + input = "&primes" + output = [["Character", "&primes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prnE without a semi-colon" do + input = "&prnE" + output = [["Character", "&prnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prnap without a semi-colon" do + input = "&prnap" + output = [["Character", "&prnap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prnsim without a semi-colon" do + input = "&prnsim" + output = [["Character", "&prnsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prod without a semi-colon" do + input = "&prod" + output = [["Character", "&prod"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: profalar without a semi-colon" do + input = "&profalar" + output = [["Character", "&profalar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: profline without a semi-colon" do + input = "&profline" + output = [["Character", "&profline"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: profsurf without a semi-colon" do + input = "&profsurf" + output = [["Character", "&profsurf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prop without a semi-colon" do + input = "&prop" + output = [["Character", "&prop"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: propto without a semi-colon" do + input = "&propto" + output = [["Character", "&propto"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prsim without a semi-colon" do + input = "&prsim" + output = [["Character", "&prsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: prurel without a semi-colon" do + input = "&prurel" + output = [["Character", "&prurel"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: pscr without a semi-colon" do + input = "&pscr" + output = [["Character", "&pscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: psi without a semi-colon" do + input = "&psi" + output = [["Character", "&psi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: puncsp without a semi-colon" do + input = "&puncsp" + output = [["Character", "&puncsp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: qfr without a semi-colon" do + input = "&qfr" + output = [["Character", "&qfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: qint without a semi-colon" do + input = "&qint" + output = [["Character", "&qint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: qopf without a semi-colon" do + input = "&qopf" + output = [["Character", "&qopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: qprime without a semi-colon" do + input = "&qprime" + output = [["Character", "&qprime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: qscr without a semi-colon" do + input = "&qscr" + output = [["Character", "&qscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: quaternions without a semi-colon" do + input = "&quaternions" + output = [["Character", "&quaternions"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: quatint without a semi-colon" do + input = "&quatint" + output = [["Character", "&quatint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: quest without a semi-colon" do + input = "&quest" + output = [["Character", "&quest"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: questeq without a semi-colon" do + input = "&questeq" + output = [["Character", "&questeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rAarr without a semi-colon" do + input = "&rAarr" + output = [["Character", "&rAarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rArr without a semi-colon" do + input = "&rArr" + output = [["Character", "&rArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rAtail without a semi-colon" do + input = "&rAtail" + output = [["Character", "&rAtail"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rBarr without a semi-colon" do + input = "&rBarr" + output = [["Character", "&rBarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rHar without a semi-colon" do + input = "&rHar" + output = [["Character", "&rHar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: race without a semi-colon" do + input = "&race" + output = [["Character", "&race"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: racute without a semi-colon" do + input = "&racute" + output = [["Character", "&racute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: radic without a semi-colon" do + input = "&radic" + output = [["Character", "&radic"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: raemptyv without a semi-colon" do + input = "&raemptyv" + output = [["Character", "&raemptyv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rang without a semi-colon" do + input = "&rang" + output = [["Character", "&rang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rangd without a semi-colon" do + input = "&rangd" + output = [["Character", "&rangd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: range without a semi-colon" do + input = "&range" + output = [["Character", "&range"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rangle without a semi-colon" do + input = "&rangle" + output = [["Character", "&rangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarr without a semi-colon" do + input = "&rarr" + output = [["Character", "&rarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrap without a semi-colon" do + input = "&rarrap" + output = [["Character", "&rarrap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrb without a semi-colon" do + input = "&rarrb" + output = [["Character", "&rarrb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrbfs without a semi-colon" do + input = "&rarrbfs" + output = [["Character", "&rarrbfs"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrc without a semi-colon" do + input = "&rarrc" + output = [["Character", "&rarrc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrfs without a semi-colon" do + input = "&rarrfs" + output = [["Character", "&rarrfs"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrhk without a semi-colon" do + input = "&rarrhk" + output = [["Character", "&rarrhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrlp without a semi-colon" do + input = "&rarrlp" + output = [["Character", "&rarrlp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrpl without a semi-colon" do + input = "&rarrpl" + output = [["Character", "&rarrpl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrsim without a semi-colon" do + input = "&rarrsim" + output = [["Character", "&rarrsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrtl without a semi-colon" do + input = "&rarrtl" + output = [["Character", "&rarrtl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rarrw without a semi-colon" do + input = "&rarrw" + output = [["Character", "&rarrw"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ratail without a semi-colon" do + input = "&ratail" + output = [["Character", "&ratail"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ratio without a semi-colon" do + input = "&ratio" + output = [["Character", "&ratio"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rationals without a semi-colon" do + input = "&rationals" + output = [["Character", "&rationals"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbarr without a semi-colon" do + input = "&rbarr" + output = [["Character", "&rbarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbbrk without a semi-colon" do + input = "&rbbrk" + output = [["Character", "&rbbrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbrace without a semi-colon" do + input = "&rbrace" + output = [["Character", "&rbrace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbrack without a semi-colon" do + input = "&rbrack" + output = [["Character", "&rbrack"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbrke without a semi-colon" do + input = "&rbrke" + output = [["Character", "&rbrke"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbrksld without a semi-colon" do + input = "&rbrksld" + output = [["Character", "&rbrksld"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rbrkslu without a semi-colon" do + input = "&rbrkslu" + output = [["Character", "&rbrkslu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rcaron without a semi-colon" do + input = "&rcaron" + output = [["Character", "&rcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rcedil without a semi-colon" do + input = "&rcedil" + output = [["Character", "&rcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rceil without a semi-colon" do + input = "&rceil" + output = [["Character", "&rceil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rcub without a semi-colon" do + input = "&rcub" + output = [["Character", "&rcub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rcy without a semi-colon" do + input = "&rcy" + output = [["Character", "&rcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rdca without a semi-colon" do + input = "&rdca" + output = [["Character", "&rdca"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rdldhar without a semi-colon" do + input = "&rdldhar" + output = [["Character", "&rdldhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rdquo without a semi-colon" do + input = "&rdquo" + output = [["Character", "&rdquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rdquor without a semi-colon" do + input = "&rdquor" + output = [["Character", "&rdquor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rdsh without a semi-colon" do + input = "&rdsh" + output = [["Character", "&rdsh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: real without a semi-colon" do + input = "&real" + output = [["Character", "&real"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: realine without a semi-colon" do + input = "&realine" + output = [["Character", "&realine"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: realpart without a semi-colon" do + input = "&realpart" + output = [["Character", "&realpart"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: reals without a semi-colon" do + input = "&reals" + output = [["Character", "&reals"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rect without a semi-colon" do + input = "&rect" + output = [["Character", "&rect"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rfisht without a semi-colon" do + input = "&rfisht" + output = [["Character", "&rfisht"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rfloor without a semi-colon" do + input = "&rfloor" + output = [["Character", "&rfloor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rfr without a semi-colon" do + input = "&rfr" + output = [["Character", "&rfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rhard without a semi-colon" do + input = "&rhard" + output = [["Character", "&rhard"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rharu without a semi-colon" do + input = "&rharu" + output = [["Character", "&rharu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rharul without a semi-colon" do + input = "&rharul" + output = [["Character", "&rharul"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rho without a semi-colon" do + input = "&rho" + output = [["Character", "&rho"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rhov without a semi-colon" do + input = "&rhov" + output = [["Character", "&rhov"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part17_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part17_test.exs new file mode 100644 index 00000000..cda2ebd6 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part17_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart17Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: rightarrow without a semi-colon" do + input = "&rightarrow" + output = [["Character", "&rightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightarrowtail without a semi-colon" do + input = "&rightarrowtail" + output = [["Character", "&rightarrowtail"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightharpoondown without a semi-colon" do + input = "&rightharpoondown" + output = [["Character", "&rightharpoondown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightharpoonup without a semi-colon" do + input = "&rightharpoonup" + output = [["Character", "&rightharpoonup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightleftarrows without a semi-colon" do + input = "&rightleftarrows" + output = [["Character", "&rightleftarrows"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightleftharpoons without a semi-colon" do + input = "&rightleftharpoons" + output = [["Character", "&rightleftharpoons"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightrightarrows without a semi-colon" do + input = "&rightrightarrows" + output = [["Character", "&rightrightarrows"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightsquigarrow without a semi-colon" do + input = "&rightsquigarrow" + output = [["Character", "&rightsquigarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rightthreetimes without a semi-colon" do + input = "&rightthreetimes" + output = [["Character", "&rightthreetimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ring without a semi-colon" do + input = "&ring" + output = [["Character", "&ring"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: risingdotseq without a semi-colon" do + input = "&risingdotseq" + output = [["Character", "&risingdotseq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rlarr without a semi-colon" do + input = "&rlarr" + output = [["Character", "&rlarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rlhar without a semi-colon" do + input = "&rlhar" + output = [["Character", "&rlhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rlm without a semi-colon" do + input = "&rlm" + output = [["Character", "&rlm"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rmoust without a semi-colon" do + input = "&rmoust" + output = [["Character", "&rmoust"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rmoustache without a semi-colon" do + input = "&rmoustache" + output = [["Character", "&rmoustache"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rnmid without a semi-colon" do + input = "&rnmid" + output = [["Character", "&rnmid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: roang without a semi-colon" do + input = "&roang" + output = [["Character", "&roang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: roarr without a semi-colon" do + input = "&roarr" + output = [["Character", "&roarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: robrk without a semi-colon" do + input = "&robrk" + output = [["Character", "&robrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ropar without a semi-colon" do + input = "&ropar" + output = [["Character", "&ropar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ropf without a semi-colon" do + input = "&ropf" + output = [["Character", "&ropf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: roplus without a semi-colon" do + input = "&roplus" + output = [["Character", "&roplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rotimes without a semi-colon" do + input = "&rotimes" + output = [["Character", "&rotimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rpar without a semi-colon" do + input = "&rpar" + output = [["Character", "&rpar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rpargt without a semi-colon" do + input = "&rpargt" + output = [["Character", "&rpargt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rppolint without a semi-colon" do + input = "&rppolint" + output = [["Character", "&rppolint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rrarr without a semi-colon" do + input = "&rrarr" + output = [["Character", "&rrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rsaquo without a semi-colon" do + input = "&rsaquo" + output = [["Character", "&rsaquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rscr without a semi-colon" do + input = "&rscr" + output = [["Character", "&rscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rsh without a semi-colon" do + input = "&rsh" + output = [["Character", "&rsh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rsqb without a semi-colon" do + input = "&rsqb" + output = [["Character", "&rsqb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rsquo without a semi-colon" do + input = "&rsquo" + output = [["Character", "&rsquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rsquor without a semi-colon" do + input = "&rsquor" + output = [["Character", "&rsquor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rthree without a semi-colon" do + input = "&rthree" + output = [["Character", "&rthree"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rtimes without a semi-colon" do + input = "&rtimes" + output = [["Character", "&rtimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rtri without a semi-colon" do + input = "&rtri" + output = [["Character", "&rtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rtrie without a semi-colon" do + input = "&rtrie" + output = [["Character", "&rtrie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rtrif without a semi-colon" do + input = "&rtrif" + output = [["Character", "&rtrif"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rtriltri without a semi-colon" do + input = "&rtriltri" + output = [["Character", "&rtriltri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ruluhar without a semi-colon" do + input = "&ruluhar" + output = [["Character", "&ruluhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: rx without a semi-colon" do + input = "&rx" + output = [["Character", "&rx"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sacute without a semi-colon" do + input = "&sacute" + output = [["Character", "&sacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sbquo without a semi-colon" do + input = "&sbquo" + output = [["Character", "&sbquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sc without a semi-colon" do + input = "&sc" + output = [["Character", "&sc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scE without a semi-colon" do + input = "&scE" + output = [["Character", "&scE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scap without a semi-colon" do + input = "&scap" + output = [["Character", "&scap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scaron without a semi-colon" do + input = "&scaron" + output = [["Character", "&scaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sccue without a semi-colon" do + input = "&sccue" + output = [["Character", "&sccue"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sce without a semi-colon" do + input = "&sce" + output = [["Character", "&sce"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scedil without a semi-colon" do + input = "&scedil" + output = [["Character", "&scedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scirc without a semi-colon" do + input = "&scirc" + output = [["Character", "&scirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scnE without a semi-colon" do + input = "&scnE" + output = [["Character", "&scnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scnap without a semi-colon" do + input = "&scnap" + output = [["Character", "&scnap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scnsim without a semi-colon" do + input = "&scnsim" + output = [["Character", "&scnsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scpolint without a semi-colon" do + input = "&scpolint" + output = [["Character", "&scpolint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scsim without a semi-colon" do + input = "&scsim" + output = [["Character", "&scsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: scy without a semi-colon" do + input = "&scy" + output = [["Character", "&scy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sdot without a semi-colon" do + input = "&sdot" + output = [["Character", "&sdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sdotb without a semi-colon" do + input = "&sdotb" + output = [["Character", "&sdotb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sdote without a semi-colon" do + input = "&sdote" + output = [["Character", "&sdote"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: seArr without a semi-colon" do + input = "&seArr" + output = [["Character", "&seArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: searhk without a semi-colon" do + input = "&searhk" + output = [["Character", "&searhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: searr without a semi-colon" do + input = "&searr" + output = [["Character", "&searr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: searrow without a semi-colon" do + input = "&searrow" + output = [["Character", "&searrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: semi without a semi-colon" do + input = "&semi" + output = [["Character", "&semi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: seswar without a semi-colon" do + input = "&seswar" + output = [["Character", "&seswar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: setminus without a semi-colon" do + input = "&setminus" + output = [["Character", "&setminus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: setmn without a semi-colon" do + input = "&setmn" + output = [["Character", "&setmn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sext without a semi-colon" do + input = "&sext" + output = [["Character", "&sext"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sfr without a semi-colon" do + input = "&sfr" + output = [["Character", "&sfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sfrown without a semi-colon" do + input = "&sfrown" + output = [["Character", "&sfrown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sharp without a semi-colon" do + input = "&sharp" + output = [["Character", "&sharp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: shchcy without a semi-colon" do + input = "&shchcy" + output = [["Character", "&shchcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: shcy without a semi-colon" do + input = "&shcy" + output = [["Character", "&shcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: shortmid without a semi-colon" do + input = "&shortmid" + output = [["Character", "&shortmid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: shortparallel without a semi-colon" do + input = "&shortparallel" + output = [["Character", "&shortparallel"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sigma without a semi-colon" do + input = "&sigma" + output = [["Character", "&sigma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sigmaf without a semi-colon" do + input = "&sigmaf" + output = [["Character", "&sigmaf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sigmav without a semi-colon" do + input = "&sigmav" + output = [["Character", "&sigmav"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sim without a semi-colon" do + input = "&sim" + output = [["Character", "&sim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simdot without a semi-colon" do + input = "&simdot" + output = [["Character", "&simdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sime without a semi-colon" do + input = "&sime" + output = [["Character", "&sime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simeq without a semi-colon" do + input = "&simeq" + output = [["Character", "&simeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simg without a semi-colon" do + input = "&simg" + output = [["Character", "&simg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simgE without a semi-colon" do + input = "&simgE" + output = [["Character", "&simgE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: siml without a semi-colon" do + input = "&siml" + output = [["Character", "&siml"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simlE without a semi-colon" do + input = "&simlE" + output = [["Character", "&simlE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simne without a semi-colon" do + input = "&simne" + output = [["Character", "&simne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simplus without a semi-colon" do + input = "&simplus" + output = [["Character", "&simplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: simrarr without a semi-colon" do + input = "&simrarr" + output = [["Character", "&simrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: slarr without a semi-colon" do + input = "&slarr" + output = [["Character", "&slarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smallsetminus without a semi-colon" do + input = "&smallsetminus" + output = [["Character", "&smallsetminus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smashp without a semi-colon" do + input = "&smashp" + output = [["Character", "&smashp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smeparsl without a semi-colon" do + input = "&smeparsl" + output = [["Character", "&smeparsl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smid without a semi-colon" do + input = "&smid" + output = [["Character", "&smid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smile without a semi-colon" do + input = "&smile" + output = [["Character", "&smile"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smt without a semi-colon" do + input = "&smt" + output = [["Character", "&smt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smte without a semi-colon" do + input = "&smte" + output = [["Character", "&smte"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: smtes without a semi-colon" do + input = "&smtes" + output = [["Character", "&smtes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part18_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part18_test.exs new file mode 100644 index 00000000..541e67ea --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part18_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart18Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: softcy without a semi-colon" do + input = "&softcy" + output = [["Character", "&softcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sol without a semi-colon" do + input = "&sol" + output = [["Character", "&sol"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: solb without a semi-colon" do + input = "&solb" + output = [["Character", "&solb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: solbar without a semi-colon" do + input = "&solbar" + output = [["Character", "&solbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sopf without a semi-colon" do + input = "&sopf" + output = [["Character", "&sopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: spades without a semi-colon" do + input = "&spades" + output = [["Character", "&spades"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: spadesuit without a semi-colon" do + input = "&spadesuit" + output = [["Character", "&spadesuit"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: spar without a semi-colon" do + input = "&spar" + output = [["Character", "&spar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqcap without a semi-colon" do + input = "&sqcap" + output = [["Character", "&sqcap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqcaps without a semi-colon" do + input = "&sqcaps" + output = [["Character", "&sqcaps"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqcup without a semi-colon" do + input = "&sqcup" + output = [["Character", "&sqcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqcups without a semi-colon" do + input = "&sqcups" + output = [["Character", "&sqcups"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsub without a semi-colon" do + input = "&sqsub" + output = [["Character", "&sqsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsube without a semi-colon" do + input = "&sqsube" + output = [["Character", "&sqsube"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsubset without a semi-colon" do + input = "&sqsubset" + output = [["Character", "&sqsubset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsubseteq without a semi-colon" do + input = "&sqsubseteq" + output = [["Character", "&sqsubseteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsup without a semi-colon" do + input = "&sqsup" + output = [["Character", "&sqsup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsupe without a semi-colon" do + input = "&sqsupe" + output = [["Character", "&sqsupe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsupset without a semi-colon" do + input = "&sqsupset" + output = [["Character", "&sqsupset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sqsupseteq without a semi-colon" do + input = "&sqsupseteq" + output = [["Character", "&sqsupseteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: squ without a semi-colon" do + input = "&squ" + output = [["Character", "&squ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: square without a semi-colon" do + input = "&square" + output = [["Character", "&square"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: squarf without a semi-colon" do + input = "&squarf" + output = [["Character", "&squarf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: squf without a semi-colon" do + input = "&squf" + output = [["Character", "&squf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: srarr without a semi-colon" do + input = "&srarr" + output = [["Character", "&srarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sscr without a semi-colon" do + input = "&sscr" + output = [["Character", "&sscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ssetmn without a semi-colon" do + input = "&ssetmn" + output = [["Character", "&ssetmn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ssmile without a semi-colon" do + input = "&ssmile" + output = [["Character", "&ssmile"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sstarf without a semi-colon" do + input = "&sstarf" + output = [["Character", "&sstarf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: star without a semi-colon" do + input = "&star" + output = [["Character", "&star"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: starf without a semi-colon" do + input = "&starf" + output = [["Character", "&starf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: straightepsilon without a semi-colon" do + input = "&straightepsilon" + output = [["Character", "&straightepsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: straightphi without a semi-colon" do + input = "&straightphi" + output = [["Character", "&straightphi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: strns without a semi-colon" do + input = "&strns" + output = [["Character", "&strns"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sub without a semi-colon" do + input = "&sub" + output = [["Character", "&sub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subE without a semi-colon" do + input = "&subE" + output = [["Character", "&subE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subdot without a semi-colon" do + input = "&subdot" + output = [["Character", "&subdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sube without a semi-colon" do + input = "&sube" + output = [["Character", "&sube"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subedot without a semi-colon" do + input = "&subedot" + output = [["Character", "&subedot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: submult without a semi-colon" do + input = "&submult" + output = [["Character", "&submult"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subnE without a semi-colon" do + input = "&subnE" + output = [["Character", "&subnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subne without a semi-colon" do + input = "&subne" + output = [["Character", "&subne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subplus without a semi-colon" do + input = "&subplus" + output = [["Character", "&subplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subrarr without a semi-colon" do + input = "&subrarr" + output = [["Character", "&subrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subset without a semi-colon" do + input = "&subset" + output = [["Character", "&subset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subseteq without a semi-colon" do + input = "&subseteq" + output = [["Character", "&subseteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subseteqq without a semi-colon" do + input = "&subseteqq" + output = [["Character", "&subseteqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subsetneq without a semi-colon" do + input = "&subsetneq" + output = [["Character", "&subsetneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subsetneqq without a semi-colon" do + input = "&subsetneqq" + output = [["Character", "&subsetneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subsim without a semi-colon" do + input = "&subsim" + output = [["Character", "&subsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subsub without a semi-colon" do + input = "&subsub" + output = [["Character", "&subsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: subsup without a semi-colon" do + input = "&subsup" + output = [["Character", "&subsup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succ without a semi-colon" do + input = "&succ" + output = [["Character", "&succ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succapprox without a semi-colon" do + input = "&succapprox" + output = [["Character", "&succapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succcurlyeq without a semi-colon" do + input = "&succcurlyeq" + output = [["Character", "&succcurlyeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succeq without a semi-colon" do + input = "&succeq" + output = [["Character", "&succeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succnapprox without a semi-colon" do + input = "&succnapprox" + output = [["Character", "&succnapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succneqq without a semi-colon" do + input = "&succneqq" + output = [["Character", "&succneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succnsim without a semi-colon" do + input = "&succnsim" + output = [["Character", "&succnsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: succsim without a semi-colon" do + input = "&succsim" + output = [["Character", "&succsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sum without a semi-colon" do + input = "&sum" + output = [["Character", "&sum"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sung without a semi-colon" do + input = "&sung" + output = [["Character", "&sung"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: sup without a semi-colon" do + input = "&sup" + output = [["Character", "&sup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supE without a semi-colon" do + input = "&supE" + output = [["Character", "&supE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supdot without a semi-colon" do + input = "&supdot" + output = [["Character", "&supdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supdsub without a semi-colon" do + input = "&supdsub" + output = [["Character", "&supdsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supe without a semi-colon" do + input = "&supe" + output = [["Character", "&supe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supedot without a semi-colon" do + input = "&supedot" + output = [["Character", "&supedot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: suphsol without a semi-colon" do + input = "&suphsol" + output = [["Character", "&suphsol"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: suphsub without a semi-colon" do + input = "&suphsub" + output = [["Character", "&suphsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: suplarr without a semi-colon" do + input = "&suplarr" + output = [["Character", "&suplarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supmult without a semi-colon" do + input = "&supmult" + output = [["Character", "&supmult"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supnE without a semi-colon" do + input = "&supnE" + output = [["Character", "&supnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supne without a semi-colon" do + input = "&supne" + output = [["Character", "&supne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supplus without a semi-colon" do + input = "&supplus" + output = [["Character", "&supplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supset without a semi-colon" do + input = "&supset" + output = [["Character", "&supset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supseteq without a semi-colon" do + input = "&supseteq" + output = [["Character", "&supseteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supseteqq without a semi-colon" do + input = "&supseteqq" + output = [["Character", "&supseteqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supsetneq without a semi-colon" do + input = "&supsetneq" + output = [["Character", "&supsetneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supsetneqq without a semi-colon" do + input = "&supsetneqq" + output = [["Character", "&supsetneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supsim without a semi-colon" do + input = "&supsim" + output = [["Character", "&supsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supsub without a semi-colon" do + input = "&supsub" + output = [["Character", "&supsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: supsup without a semi-colon" do + input = "&supsup" + output = [["Character", "&supsup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: swArr without a semi-colon" do + input = "&swArr" + output = [["Character", "&swArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: swarhk without a semi-colon" do + input = "&swarhk" + output = [["Character", "&swarhk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: swarr without a semi-colon" do + input = "&swarr" + output = [["Character", "&swarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: swarrow without a semi-colon" do + input = "&swarrow" + output = [["Character", "&swarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: swnwar without a semi-colon" do + input = "&swnwar" + output = [["Character", "&swnwar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: target without a semi-colon" do + input = "&target" + output = [["Character", "&target"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tau without a semi-colon" do + input = "&tau" + output = [["Character", "&tau"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tbrk without a semi-colon" do + input = "&tbrk" + output = [["Character", "&tbrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tcaron without a semi-colon" do + input = "&tcaron" + output = [["Character", "&tcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tcedil without a semi-colon" do + input = "&tcedil" + output = [["Character", "&tcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tcy without a semi-colon" do + input = "&tcy" + output = [["Character", "&tcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tdot without a semi-colon" do + input = "&tdot" + output = [["Character", "&tdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: telrec without a semi-colon" do + input = "&telrec" + output = [["Character", "&telrec"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tfr without a semi-colon" do + input = "&tfr" + output = [["Character", "&tfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: there4 without a semi-colon" do + input = "&there4" + output = [["Character", "&there4"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: therefore without a semi-colon" do + input = "&therefore" + output = [["Character", "&therefore"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: theta without a semi-colon" do + input = "&theta" + output = [["Character", "&theta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part19_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part19_test.exs new file mode 100644 index 00000000..31a4a9d9 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part19_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart19Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: thetasym without a semi-colon" do + input = "&thetasym" + output = [["Character", "&thetasym"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: thetav without a semi-colon" do + input = "&thetav" + output = [["Character", "&thetav"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: thickapprox without a semi-colon" do + input = "&thickapprox" + output = [["Character", "&thickapprox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: thicksim without a semi-colon" do + input = "&thicksim" + output = [["Character", "&thicksim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: thinsp without a semi-colon" do + input = "&thinsp" + output = [["Character", "&thinsp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: thkap without a semi-colon" do + input = "&thkap" + output = [["Character", "&thkap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: thksim without a semi-colon" do + input = "&thksim" + output = [["Character", "&thksim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tilde without a semi-colon" do + input = "&tilde" + output = [["Character", "&tilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tint without a semi-colon" do + input = "&tint" + output = [["Character", "&tint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: toea without a semi-colon" do + input = "&toea" + output = [["Character", "&toea"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: top without a semi-colon" do + input = "&top" + output = [["Character", "&top"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: topbot without a semi-colon" do + input = "&topbot" + output = [["Character", "&topbot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: topcir without a semi-colon" do + input = "&topcir" + output = [["Character", "&topcir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: topf without a semi-colon" do + input = "&topf" + output = [["Character", "&topf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: topfork without a semi-colon" do + input = "&topfork" + output = [["Character", "&topfork"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tosa without a semi-colon" do + input = "&tosa" + output = [["Character", "&tosa"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tprime without a semi-colon" do + input = "&tprime" + output = [["Character", "&tprime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: trade without a semi-colon" do + input = "&trade" + output = [["Character", "&trade"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triangle without a semi-colon" do + input = "&triangle" + output = [["Character", "&triangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triangledown without a semi-colon" do + input = "&triangledown" + output = [["Character", "&triangledown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triangleleft without a semi-colon" do + input = "&triangleleft" + output = [["Character", "&triangleleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: trianglelefteq without a semi-colon" do + input = "&trianglelefteq" + output = [["Character", "&trianglelefteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triangleq without a semi-colon" do + input = "&triangleq" + output = [["Character", "&triangleq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triangleright without a semi-colon" do + input = "&triangleright" + output = [["Character", "&triangleright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: trianglerighteq without a semi-colon" do + input = "&trianglerighteq" + output = [["Character", "&trianglerighteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tridot without a semi-colon" do + input = "&tridot" + output = [["Character", "&tridot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: trie without a semi-colon" do + input = "&trie" + output = [["Character", "&trie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triminus without a semi-colon" do + input = "&triminus" + output = [["Character", "&triminus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: triplus without a semi-colon" do + input = "&triplus" + output = [["Character", "&triplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: trisb without a semi-colon" do + input = "&trisb" + output = [["Character", "&trisb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tritime without a semi-colon" do + input = "&tritime" + output = [["Character", "&tritime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: trpezium without a semi-colon" do + input = "&trpezium" + output = [["Character", "&trpezium"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tscr without a semi-colon" do + input = "&tscr" + output = [["Character", "&tscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tscy without a semi-colon" do + input = "&tscy" + output = [["Character", "&tscy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tshcy without a semi-colon" do + input = "&tshcy" + output = [["Character", "&tshcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: tstrok without a semi-colon" do + input = "&tstrok" + output = [["Character", "&tstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: twixt without a semi-colon" do + input = "&twixt" + output = [["Character", "&twixt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: twoheadleftarrow without a semi-colon" do + input = "&twoheadleftarrow" + output = [["Character", "&twoheadleftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: twoheadrightarrow without a semi-colon" do + input = "&twoheadrightarrow" + output = [["Character", "&twoheadrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uArr without a semi-colon" do + input = "&uArr" + output = [["Character", "&uArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uHar without a semi-colon" do + input = "&uHar" + output = [["Character", "&uHar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uarr without a semi-colon" do + input = "&uarr" + output = [["Character", "&uarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ubrcy without a semi-colon" do + input = "&ubrcy" + output = [["Character", "&ubrcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ubreve without a semi-colon" do + input = "&ubreve" + output = [["Character", "&ubreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ucy without a semi-colon" do + input = "&ucy" + output = [["Character", "&ucy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: udarr without a semi-colon" do + input = "&udarr" + output = [["Character", "&udarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: udblac without a semi-colon" do + input = "&udblac" + output = [["Character", "&udblac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: udhar without a semi-colon" do + input = "&udhar" + output = [["Character", "&udhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ufisht without a semi-colon" do + input = "&ufisht" + output = [["Character", "&ufisht"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ufr without a semi-colon" do + input = "&ufr" + output = [["Character", "&ufr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uharl without a semi-colon" do + input = "&uharl" + output = [["Character", "&uharl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uharr without a semi-colon" do + input = "&uharr" + output = [["Character", "&uharr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uhblk without a semi-colon" do + input = "&uhblk" + output = [["Character", "&uhblk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ulcorn without a semi-colon" do + input = "&ulcorn" + output = [["Character", "&ulcorn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ulcorner without a semi-colon" do + input = "&ulcorner" + output = [["Character", "&ulcorner"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ulcrop without a semi-colon" do + input = "&ulcrop" + output = [["Character", "&ulcrop"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ultri without a semi-colon" do + input = "&ultri" + output = [["Character", "&ultri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: umacr without a semi-colon" do + input = "&umacr" + output = [["Character", "&umacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uogon without a semi-colon" do + input = "&uogon" + output = [["Character", "&uogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uopf without a semi-colon" do + input = "&uopf" + output = [["Character", "&uopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uparrow without a semi-colon" do + input = "&uparrow" + output = [["Character", "&uparrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: updownarrow without a semi-colon" do + input = "&updownarrow" + output = [["Character", "&updownarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: upharpoonleft without a semi-colon" do + input = "&upharpoonleft" + output = [["Character", "&upharpoonleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: upharpoonright without a semi-colon" do + input = "&upharpoonright" + output = [["Character", "&upharpoonright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uplus without a semi-colon" do + input = "&uplus" + output = [["Character", "&uplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: upsi without a semi-colon" do + input = "&upsi" + output = [["Character", "&upsi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: upsih without a semi-colon" do + input = "&upsih" + output = [["Character", "&upsih"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: upsilon without a semi-colon" do + input = "&upsilon" + output = [["Character", "&upsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: upuparrows without a semi-colon" do + input = "&upuparrows" + output = [["Character", "&upuparrows"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: urcorn without a semi-colon" do + input = "&urcorn" + output = [["Character", "&urcorn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: urcorner without a semi-colon" do + input = "&urcorner" + output = [["Character", "&urcorner"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: urcrop without a semi-colon" do + input = "&urcrop" + output = [["Character", "&urcrop"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uring without a semi-colon" do + input = "&uring" + output = [["Character", "&uring"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: urtri without a semi-colon" do + input = "&urtri" + output = [["Character", "&urtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uscr without a semi-colon" do + input = "&uscr" + output = [["Character", "&uscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: utdot without a semi-colon" do + input = "&utdot" + output = [["Character", "&utdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: utilde without a semi-colon" do + input = "&utilde" + output = [["Character", "&utilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: utri without a semi-colon" do + input = "&utri" + output = [["Character", "&utri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: utrif without a semi-colon" do + input = "&utrif" + output = [["Character", "&utrif"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uuarr without a semi-colon" do + input = "&uuarr" + output = [["Character", "&uuarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: uwangle without a semi-colon" do + input = "&uwangle" + output = [["Character", "&uwangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vArr without a semi-colon" do + input = "&vArr" + output = [["Character", "&vArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vBar without a semi-colon" do + input = "&vBar" + output = [["Character", "&vBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vBarv without a semi-colon" do + input = "&vBarv" + output = [["Character", "&vBarv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vDash without a semi-colon" do + input = "&vDash" + output = [["Character", "&vDash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vangrt without a semi-colon" do + input = "&vangrt" + output = [["Character", "&vangrt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varepsilon without a semi-colon" do + input = "&varepsilon" + output = [["Character", "&varepsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varkappa without a semi-colon" do + input = "&varkappa" + output = [["Character", "&varkappa"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varnothing without a semi-colon" do + input = "&varnothing" + output = [["Character", "&varnothing"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varphi without a semi-colon" do + input = "&varphi" + output = [["Character", "&varphi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varpi without a semi-colon" do + input = "&varpi" + output = [["Character", "&varpi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varpropto without a semi-colon" do + input = "&varpropto" + output = [["Character", "&varpropto"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varr without a semi-colon" do + input = "&varr" + output = [["Character", "&varr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varrho without a semi-colon" do + input = "&varrho" + output = [["Character", "&varrho"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varsigma without a semi-colon" do + input = "&varsigma" + output = [["Character", "&varsigma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varsubsetneq without a semi-colon" do + input = "&varsubsetneq" + output = [["Character", "&varsubsetneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varsubsetneqq without a semi-colon" do + input = "&varsubsetneqq" + output = [["Character", "&varsubsetneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varsupsetneq without a semi-colon" do + input = "&varsupsetneq" + output = [["Character", "&varsupsetneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: varsupsetneqq without a semi-colon" do + input = "&varsupsetneqq" + output = [["Character", "&varsupsetneqq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vartheta without a semi-colon" do + input = "&vartheta" + output = [["Character", "&vartheta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part1_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part1_test.exs new file mode 100644 index 00000000..feba9cc9 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part1_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart1Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: Abreve without a semi-colon" do + input = "&Abreve" + output = [["Character", "&Abreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Acy without a semi-colon" do + input = "&Acy" + output = [["Character", "&Acy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Afr without a semi-colon" do + input = "&Afr" + output = [["Character", "&Afr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Alpha without a semi-colon" do + input = "&Alpha" + output = [["Character", "&Alpha"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Amacr without a semi-colon" do + input = "&Amacr" + output = [["Character", "&Amacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: And without a semi-colon" do + input = "&And" + output = [["Character", "&And"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Aogon without a semi-colon" do + input = "&Aogon" + output = [["Character", "&Aogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Aopf without a semi-colon" do + input = "&Aopf" + output = [["Character", "&Aopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ApplyFunction without a semi-colon" do + input = "&ApplyFunction" + output = [["Character", "&ApplyFunction"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ascr without a semi-colon" do + input = "&Ascr" + output = [["Character", "&Ascr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Assign without a semi-colon" do + input = "&Assign" + output = [["Character", "&Assign"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Backslash without a semi-colon" do + input = "&Backslash" + output = [["Character", "&Backslash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Barv without a semi-colon" do + input = "&Barv" + output = [["Character", "&Barv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Barwed without a semi-colon" do + input = "&Barwed" + output = [["Character", "&Barwed"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Bcy without a semi-colon" do + input = "&Bcy" + output = [["Character", "&Bcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Because without a semi-colon" do + input = "&Because" + output = [["Character", "&Because"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Bernoullis without a semi-colon" do + input = "&Bernoullis" + output = [["Character", "&Bernoullis"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Beta without a semi-colon" do + input = "&Beta" + output = [["Character", "&Beta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Bfr without a semi-colon" do + input = "&Bfr" + output = [["Character", "&Bfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Bopf without a semi-colon" do + input = "&Bopf" + output = [["Character", "&Bopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Breve without a semi-colon" do + input = "&Breve" + output = [["Character", "&Breve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Bscr without a semi-colon" do + input = "&Bscr" + output = [["Character", "&Bscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Bumpeq without a semi-colon" do + input = "&Bumpeq" + output = [["Character", "&Bumpeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CHcy without a semi-colon" do + input = "&CHcy" + output = [["Character", "&CHcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cacute without a semi-colon" do + input = "&Cacute" + output = [["Character", "&Cacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cap without a semi-colon" do + input = "&Cap" + output = [["Character", "&Cap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CapitalDifferentialD without a semi-colon" do + input = "&CapitalDifferentialD" + output = [["Character", "&CapitalDifferentialD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cayleys without a semi-colon" do + input = "&Cayleys" + output = [["Character", "&Cayleys"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ccaron without a semi-colon" do + input = "&Ccaron" + output = [["Character", "&Ccaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ccirc without a semi-colon" do + input = "&Ccirc" + output = [["Character", "&Ccirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cconint without a semi-colon" do + input = "&Cconint" + output = [["Character", "&Cconint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cdot without a semi-colon" do + input = "&Cdot" + output = [["Character", "&Cdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cedilla without a semi-colon" do + input = "&Cedilla" + output = [["Character", "&Cedilla"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CenterDot without a semi-colon" do + input = "&CenterDot" + output = [["Character", "&CenterDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cfr without a semi-colon" do + input = "&Cfr" + output = [["Character", "&Cfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Chi without a semi-colon" do + input = "&Chi" + output = [["Character", "&Chi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CircleDot without a semi-colon" do + input = "&CircleDot" + output = [["Character", "&CircleDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CircleMinus without a semi-colon" do + input = "&CircleMinus" + output = [["Character", "&CircleMinus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CirclePlus without a semi-colon" do + input = "&CirclePlus" + output = [["Character", "&CirclePlus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CircleTimes without a semi-colon" do + input = "&CircleTimes" + output = [["Character", "&CircleTimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ClockwiseContourIntegral without a semi-colon" do + input = "&ClockwiseContourIntegral" + output = [["Character", "&ClockwiseContourIntegral"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CloseCurlyDoubleQuote without a semi-colon" do + input = "&CloseCurlyDoubleQuote" + output = [["Character", "&CloseCurlyDoubleQuote"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CloseCurlyQuote without a semi-colon" do + input = "&CloseCurlyQuote" + output = [["Character", "&CloseCurlyQuote"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Colon without a semi-colon" do + input = "&Colon" + output = [["Character", "&Colon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Colone without a semi-colon" do + input = "&Colone" + output = [["Character", "&Colone"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Congruent without a semi-colon" do + input = "&Congruent" + output = [["Character", "&Congruent"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Conint without a semi-colon" do + input = "&Conint" + output = [["Character", "&Conint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ContourIntegral without a semi-colon" do + input = "&ContourIntegral" + output = [["Character", "&ContourIntegral"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Copf without a semi-colon" do + input = "&Copf" + output = [["Character", "&Copf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Coproduct without a semi-colon" do + input = "&Coproduct" + output = [["Character", "&Coproduct"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CounterClockwiseContourIntegral without a semi-colon" do + input = "&CounterClockwiseContourIntegral" + output = [["Character", "&CounterClockwiseContourIntegral"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cross without a semi-colon" do + input = "&Cross" + output = [["Character", "&Cross"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cscr without a semi-colon" do + input = "&Cscr" + output = [["Character", "&Cscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Cup without a semi-colon" do + input = "&Cup" + output = [["Character", "&Cup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: CupCap without a semi-colon" do + input = "&CupCap" + output = [["Character", "&CupCap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DD without a semi-colon" do + input = "&DD" + output = [["Character", "&DD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DDotrahd without a semi-colon" do + input = "&DDotrahd" + output = [["Character", "&DDotrahd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DJcy without a semi-colon" do + input = "&DJcy" + output = [["Character", "&DJcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DScy without a semi-colon" do + input = "&DScy" + output = [["Character", "&DScy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DZcy without a semi-colon" do + input = "&DZcy" + output = [["Character", "&DZcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dagger without a semi-colon" do + input = "&Dagger" + output = [["Character", "&Dagger"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Darr without a semi-colon" do + input = "&Darr" + output = [["Character", "&Darr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dashv without a semi-colon" do + input = "&Dashv" + output = [["Character", "&Dashv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dcaron without a semi-colon" do + input = "&Dcaron" + output = [["Character", "&Dcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dcy without a semi-colon" do + input = "&Dcy" + output = [["Character", "&Dcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Del without a semi-colon" do + input = "&Del" + output = [["Character", "&Del"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Delta without a semi-colon" do + input = "&Delta" + output = [["Character", "&Delta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dfr without a semi-colon" do + input = "&Dfr" + output = [["Character", "&Dfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DiacriticalAcute without a semi-colon" do + input = "&DiacriticalAcute" + output = [["Character", "&DiacriticalAcute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DiacriticalDot without a semi-colon" do + input = "&DiacriticalDot" + output = [["Character", "&DiacriticalDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DiacriticalDoubleAcute without a semi-colon" do + input = "&DiacriticalDoubleAcute" + output = [["Character", "&DiacriticalDoubleAcute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DiacriticalGrave without a semi-colon" do + input = "&DiacriticalGrave" + output = [["Character", "&DiacriticalGrave"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DiacriticalTilde without a semi-colon" do + input = "&DiacriticalTilde" + output = [["Character", "&DiacriticalTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Diamond without a semi-colon" do + input = "&Diamond" + output = [["Character", "&Diamond"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DifferentialD without a semi-colon" do + input = "&DifferentialD" + output = [["Character", "&DifferentialD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dopf without a semi-colon" do + input = "&Dopf" + output = [["Character", "&Dopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dot without a semi-colon" do + input = "&Dot" + output = [["Character", "&Dot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DotDot without a semi-colon" do + input = "&DotDot" + output = [["Character", "&DotDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DotEqual without a semi-colon" do + input = "&DotEqual" + output = [["Character", "&DotEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleContourIntegral without a semi-colon" do + input = "&DoubleContourIntegral" + output = [["Character", "&DoubleContourIntegral"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleDot without a semi-colon" do + input = "&DoubleDot" + output = [["Character", "&DoubleDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleDownArrow without a semi-colon" do + input = "&DoubleDownArrow" + output = [["Character", "&DoubleDownArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleLeftArrow without a semi-colon" do + input = "&DoubleLeftArrow" + output = [["Character", "&DoubleLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleLeftRightArrow without a semi-colon" do + input = "&DoubleLeftRightArrow" + output = [["Character", "&DoubleLeftRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleLeftTee without a semi-colon" do + input = "&DoubleLeftTee" + output = [["Character", "&DoubleLeftTee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleLongLeftArrow without a semi-colon" do + input = "&DoubleLongLeftArrow" + output = [["Character", "&DoubleLongLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleLongLeftRightArrow without a semi-colon" do + input = "&DoubleLongLeftRightArrow" + output = [["Character", "&DoubleLongLeftRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleLongRightArrow without a semi-colon" do + input = "&DoubleLongRightArrow" + output = [["Character", "&DoubleLongRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleRightArrow without a semi-colon" do + input = "&DoubleRightArrow" + output = [["Character", "&DoubleRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleRightTee without a semi-colon" do + input = "&DoubleRightTee" + output = [["Character", "&DoubleRightTee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleUpArrow without a semi-colon" do + input = "&DoubleUpArrow" + output = [["Character", "&DoubleUpArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleUpDownArrow without a semi-colon" do + input = "&DoubleUpDownArrow" + output = [["Character", "&DoubleUpDownArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DoubleVerticalBar without a semi-colon" do + input = "&DoubleVerticalBar" + output = [["Character", "&DoubleVerticalBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownArrow without a semi-colon" do + input = "&DownArrow" + output = [["Character", "&DownArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownArrowBar without a semi-colon" do + input = "&DownArrowBar" + output = [["Character", "&DownArrowBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownArrowUpArrow without a semi-colon" do + input = "&DownArrowUpArrow" + output = [["Character", "&DownArrowUpArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownBreve without a semi-colon" do + input = "&DownBreve" + output = [["Character", "&DownBreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownLeftRightVector without a semi-colon" do + input = "&DownLeftRightVector" + output = [["Character", "&DownLeftRightVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownLeftTeeVector without a semi-colon" do + input = "&DownLeftTeeVector" + output = [["Character", "&DownLeftTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownLeftVector without a semi-colon" do + input = "&DownLeftVector" + output = [["Character", "&DownLeftVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part20_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part20_test.exs new file mode 100644 index 00000000..6813410a --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part20_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart20Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: vartriangleleft without a semi-colon" do + input = "&vartriangleleft" + output = [["Character", "&vartriangleleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vartriangleright without a semi-colon" do + input = "&vartriangleright" + output = [["Character", "&vartriangleright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vcy without a semi-colon" do + input = "&vcy" + output = [["Character", "&vcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vdash without a semi-colon" do + input = "&vdash" + output = [["Character", "&vdash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vee without a semi-colon" do + input = "&vee" + output = [["Character", "&vee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: veebar without a semi-colon" do + input = "&veebar" + output = [["Character", "&veebar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: veeeq without a semi-colon" do + input = "&veeeq" + output = [["Character", "&veeeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vellip without a semi-colon" do + input = "&vellip" + output = [["Character", "&vellip"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: verbar without a semi-colon" do + input = "&verbar" + output = [["Character", "&verbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vert without a semi-colon" do + input = "&vert" + output = [["Character", "&vert"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vfr without a semi-colon" do + input = "&vfr" + output = [["Character", "&vfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vltri without a semi-colon" do + input = "&vltri" + output = [["Character", "&vltri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vnsub without a semi-colon" do + input = "&vnsub" + output = [["Character", "&vnsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vnsup without a semi-colon" do + input = "&vnsup" + output = [["Character", "&vnsup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vopf without a semi-colon" do + input = "&vopf" + output = [["Character", "&vopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vprop without a semi-colon" do + input = "&vprop" + output = [["Character", "&vprop"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vrtri without a semi-colon" do + input = "&vrtri" + output = [["Character", "&vrtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vscr without a semi-colon" do + input = "&vscr" + output = [["Character", "&vscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vsubnE without a semi-colon" do + input = "&vsubnE" + output = [["Character", "&vsubnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vsubne without a semi-colon" do + input = "&vsubne" + output = [["Character", "&vsubne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vsupnE without a semi-colon" do + input = "&vsupnE" + output = [["Character", "&vsupnE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vsupne without a semi-colon" do + input = "&vsupne" + output = [["Character", "&vsupne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: vzigzag without a semi-colon" do + input = "&vzigzag" + output = [["Character", "&vzigzag"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wcirc without a semi-colon" do + input = "&wcirc" + output = [["Character", "&wcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wedbar without a semi-colon" do + input = "&wedbar" + output = [["Character", "&wedbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wedge without a semi-colon" do + input = "&wedge" + output = [["Character", "&wedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wedgeq without a semi-colon" do + input = "&wedgeq" + output = [["Character", "&wedgeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: weierp without a semi-colon" do + input = "&weierp" + output = [["Character", "&weierp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wfr without a semi-colon" do + input = "&wfr" + output = [["Character", "&wfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wopf without a semi-colon" do + input = "&wopf" + output = [["Character", "&wopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wp without a semi-colon" do + input = "&wp" + output = [["Character", "&wp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wr without a semi-colon" do + input = "&wr" + output = [["Character", "&wr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wreath without a semi-colon" do + input = "&wreath" + output = [["Character", "&wreath"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: wscr without a semi-colon" do + input = "&wscr" + output = [["Character", "&wscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xcap without a semi-colon" do + input = "&xcap" + output = [["Character", "&xcap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xcirc without a semi-colon" do + input = "&xcirc" + output = [["Character", "&xcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xcup without a semi-colon" do + input = "&xcup" + output = [["Character", "&xcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xdtri without a semi-colon" do + input = "&xdtri" + output = [["Character", "&xdtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xfr without a semi-colon" do + input = "&xfr" + output = [["Character", "&xfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xhArr without a semi-colon" do + input = "&xhArr" + output = [["Character", "&xhArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xharr without a semi-colon" do + input = "&xharr" + output = [["Character", "&xharr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xi without a semi-colon" do + input = "&xi" + output = [["Character", "&xi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xlArr without a semi-colon" do + input = "&xlArr" + output = [["Character", "&xlArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xlarr without a semi-colon" do + input = "&xlarr" + output = [["Character", "&xlarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xmap without a semi-colon" do + input = "&xmap" + output = [["Character", "&xmap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xnis without a semi-colon" do + input = "&xnis" + output = [["Character", "&xnis"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xodot without a semi-colon" do + input = "&xodot" + output = [["Character", "&xodot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xopf without a semi-colon" do + input = "&xopf" + output = [["Character", "&xopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xoplus without a semi-colon" do + input = "&xoplus" + output = [["Character", "&xoplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xotime without a semi-colon" do + input = "&xotime" + output = [["Character", "&xotime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xrArr without a semi-colon" do + input = "&xrArr" + output = [["Character", "&xrArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xrarr without a semi-colon" do + input = "&xrarr" + output = [["Character", "&xrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xscr without a semi-colon" do + input = "&xscr" + output = [["Character", "&xscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xsqcup without a semi-colon" do + input = "&xsqcup" + output = [["Character", "&xsqcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xuplus without a semi-colon" do + input = "&xuplus" + output = [["Character", "&xuplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xutri without a semi-colon" do + input = "&xutri" + output = [["Character", "&xutri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xvee without a semi-colon" do + input = "&xvee" + output = [["Character", "&xvee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: xwedge without a semi-colon" do + input = "&xwedge" + output = [["Character", "&xwedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: yacy without a semi-colon" do + input = "&yacy" + output = [["Character", "&yacy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ycirc without a semi-colon" do + input = "&ycirc" + output = [["Character", "&ycirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ycy without a semi-colon" do + input = "&ycy" + output = [["Character", "&ycy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: yfr without a semi-colon" do + input = "&yfr" + output = [["Character", "&yfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: yicy without a semi-colon" do + input = "&yicy" + output = [["Character", "&yicy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: yopf without a semi-colon" do + input = "&yopf" + output = [["Character", "&yopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: yscr without a semi-colon" do + input = "&yscr" + output = [["Character", "&yscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: yucy without a semi-colon" do + input = "&yucy" + output = [["Character", "&yucy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zacute without a semi-colon" do + input = "&zacute" + output = [["Character", "&zacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zcaron without a semi-colon" do + input = "&zcaron" + output = [["Character", "&zcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zcy without a semi-colon" do + input = "&zcy" + output = [["Character", "&zcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zdot without a semi-colon" do + input = "&zdot" + output = [["Character", "&zdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zeetrf without a semi-colon" do + input = "&zeetrf" + output = [["Character", "&zeetrf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zeta without a semi-colon" do + input = "&zeta" + output = [["Character", "&zeta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zfr without a semi-colon" do + input = "&zfr" + output = [["Character", "&zfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zhcy without a semi-colon" do + input = "&zhcy" + output = [["Character", "&zhcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zigrarr without a semi-colon" do + input = "&zigrarr" + output = [["Character", "&zigrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zopf without a semi-colon" do + input = "&zopf" + output = [["Character", "&zopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zscr without a semi-colon" do + input = "&zscr" + output = [["Character", "&zscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zwj without a semi-colon" do + input = "&zwj" + output = [["Character", "&zwj"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: zwnj without a semi-colon" do + input = "&zwnj" + output = [["Character", "&zwnj"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: AElig without a semi-colon" do + input = "Æ" + output = [["Character", "Æ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: AElig; with a semi-colon" do + input = "Æ" + output = [["Character", "Æ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: AMP without a semi-colon" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: AMP; with a semi-colon" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Aacute without a semi-colon" do + input = "Á" + output = [["Character", "Á"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Aacute; with a semi-colon" do + input = "Á" + output = [["Character", "Á"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Abreve; with a semi-colon" do + input = "Ă" + output = [["Character", "Ă"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Acirc without a semi-colon" do + input = "Â" + output = [["Character", "Â"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Acirc; with a semi-colon" do + input = "Â" + output = [["Character", "Â"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Acy; with a semi-colon" do + input = "А" + output = [["Character", "А"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Afr; with a semi-colon" do + input = "𝔄" + output = [["Character", "𝔄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Agrave without a semi-colon" do + input = "À" + output = [["Character", "À"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Agrave; with a semi-colon" do + input = "À" + output = [["Character", "À"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Alpha; with a semi-colon" do + input = "Α" + output = [["Character", "Α"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Amacr; with a semi-colon" do + input = "Ā" + output = [["Character", "Ā"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: And; with a semi-colon" do + input = "⩓" + output = [["Character", "⩓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Aogon; with a semi-colon" do + input = "Ą" + output = [["Character", "Ą"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Aopf; with a semi-colon" do + input = "𝔸" + output = [["Character", "𝔸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ApplyFunction; with a semi-colon" do + input = "⁡" + output = [["Character", "⁡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Aring without a semi-colon" do + input = "Å" + output = [["Character", "Å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Aring; with a semi-colon" do + input = "Å" + output = [["Character", "Å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part21_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part21_test.exs new file mode 100644 index 00000000..294d1f2a --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part21_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart21Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: Ascr; with a semi-colon" do + input = "𝒜" + output = [["Character", "𝒜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Assign; with a semi-colon" do + input = "≔" + output = [["Character", "≔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Atilde without a semi-colon" do + input = "Ã" + output = [["Character", "Ã"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Atilde; with a semi-colon" do + input = "Ã" + output = [["Character", "Ã"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Auml without a semi-colon" do + input = "Ä" + output = [["Character", "Ä"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Auml; with a semi-colon" do + input = "Ä" + output = [["Character", "Ä"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Backslash; with a semi-colon" do + input = "∖" + output = [["Character", "∖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Barv; with a semi-colon" do + input = "⫧" + output = [["Character", "⫧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Barwed; with a semi-colon" do + input = "⌆" + output = [["Character", "⌆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Bcy; with a semi-colon" do + input = "Б" + output = [["Character", "Б"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Because; with a semi-colon" do + input = "∵" + output = [["Character", "∵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Bernoullis; with a semi-colon" do + input = "ℬ" + output = [["Character", "ℬ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Beta; with a semi-colon" do + input = "Β" + output = [["Character", "Β"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Bfr; with a semi-colon" do + input = "𝔅" + output = [["Character", "𝔅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Bopf; with a semi-colon" do + input = "𝔹" + output = [["Character", "𝔹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Breve; with a semi-colon" do + input = "˘" + output = [["Character", "˘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Bscr; with a semi-colon" do + input = "ℬ" + output = [["Character", "ℬ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Bumpeq; with a semi-colon" do + input = "≎" + output = [["Character", "≎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CHcy; with a semi-colon" do + input = "Ч" + output = [["Character", "Ч"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: COPY without a semi-colon" do + input = "©" + output = [["Character", "©"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: COPY; with a semi-colon" do + input = "©" + output = [["Character", "©"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cacute; with a semi-colon" do + input = "Ć" + output = [["Character", "Ć"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cap; with a semi-colon" do + input = "⋒" + output = [["Character", "⋒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CapitalDifferentialD; with a semi-colon" do + input = "ⅅ" + output = [["Character", "ⅅ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cayleys; with a semi-colon" do + input = "ℭ" + output = [["Character", "ℭ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ccaron; with a semi-colon" do + input = "Č" + output = [["Character", "Č"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ccedil without a semi-colon" do + input = "Ç" + output = [["Character", "Ç"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ccedil; with a semi-colon" do + input = "Ç" + output = [["Character", "Ç"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ccirc; with a semi-colon" do + input = "Ĉ" + output = [["Character", "Ĉ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cconint; with a semi-colon" do + input = "∰" + output = [["Character", "∰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cdot; with a semi-colon" do + input = "Ċ" + output = [["Character", "Ċ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cedilla; with a semi-colon" do + input = "¸" + output = [["Character", "¸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CenterDot; with a semi-colon" do + input = "·" + output = [["Character", "·"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cfr; with a semi-colon" do + input = "ℭ" + output = [["Character", "ℭ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Chi; with a semi-colon" do + input = "Χ" + output = [["Character", "Χ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CircleDot; with a semi-colon" do + input = "⊙" + output = [["Character", "⊙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CircleMinus; with a semi-colon" do + input = "⊖" + output = [["Character", "⊖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CirclePlus; with a semi-colon" do + input = "⊕" + output = [["Character", "⊕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CircleTimes; with a semi-colon" do + input = "⊗" + output = [["Character", "⊗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ClockwiseContourIntegral; with a semi-colon" do + input = "∲" + output = [["Character", "∲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CloseCurlyDoubleQuote; with a semi-colon" do + input = "”" + output = [["Character", "”"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CloseCurlyQuote; with a semi-colon" do + input = "’" + output = [["Character", "’"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Colon; with a semi-colon" do + input = "∷" + output = [["Character", "∷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Colone; with a semi-colon" do + input = "⩴" + output = [["Character", "⩴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Congruent; with a semi-colon" do + input = "≡" + output = [["Character", "≡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Conint; with a semi-colon" do + input = "∯" + output = [["Character", "∯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ContourIntegral; with a semi-colon" do + input = "∮" + output = [["Character", "∮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Copf; with a semi-colon" do + input = "ℂ" + output = [["Character", "ℂ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Coproduct; with a semi-colon" do + input = "∐" + output = [["Character", "∐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CounterClockwiseContourIntegral; with a semi-colon" do + input = "∳" + output = [["Character", "∳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cross; with a semi-colon" do + input = "⨯" + output = [["Character", "⨯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cscr; with a semi-colon" do + input = "𝒞" + output = [["Character", "𝒞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Cup; with a semi-colon" do + input = "⋓" + output = [["Character", "⋓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: CupCap; with a semi-colon" do + input = "≍" + output = [["Character", "≍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DD; with a semi-colon" do + input = "ⅅ" + output = [["Character", "ⅅ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DDotrahd; with a semi-colon" do + input = "⤑" + output = [["Character", "⤑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DJcy; with a semi-colon" do + input = "Ђ" + output = [["Character", "Ђ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DScy; with a semi-colon" do + input = "Ѕ" + output = [["Character", "Ѕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DZcy; with a semi-colon" do + input = "Џ" + output = [["Character", "Џ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dagger; with a semi-colon" do + input = "‡" + output = [["Character", "‡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Darr; with a semi-colon" do + input = "↡" + output = [["Character", "↡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dashv; with a semi-colon" do + input = "⫤" + output = [["Character", "⫤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dcaron; with a semi-colon" do + input = "Ď" + output = [["Character", "Ď"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dcy; with a semi-colon" do + input = "Д" + output = [["Character", "Д"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Del; with a semi-colon" do + input = "∇" + output = [["Character", "∇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Delta; with a semi-colon" do + input = "Δ" + output = [["Character", "Δ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dfr; with a semi-colon" do + input = "𝔇" + output = [["Character", "𝔇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DiacriticalAcute; with a semi-colon" do + input = "´" + output = [["Character", "´"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DiacriticalDot; with a semi-colon" do + input = "˙" + output = [["Character", "˙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DiacriticalDoubleAcute; with a semi-colon" do + input = "˝" + output = [["Character", "˝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DiacriticalGrave; with a semi-colon" do + input = "`" + output = [["Character", "`"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DiacriticalTilde; with a semi-colon" do + input = "˜" + output = [["Character", "˜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Diamond; with a semi-colon" do + input = "⋄" + output = [["Character", "⋄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DifferentialD; with a semi-colon" do + input = "ⅆ" + output = [["Character", "ⅆ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dopf; with a semi-colon" do + input = "𝔻" + output = [["Character", "𝔻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dot; with a semi-colon" do + input = "¨" + output = [["Character", "¨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DotDot; with a semi-colon" do + input = "⃜" + output = [["Character", "⃜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DotEqual; with a semi-colon" do + input = "≐" + output = [["Character", "≐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleContourIntegral; with a semi-colon" do + input = "∯" + output = [["Character", "∯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleDot; with a semi-colon" do + input = "¨" + output = [["Character", "¨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleDownArrow; with a semi-colon" do + input = "⇓" + output = [["Character", "⇓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleLeftArrow; with a semi-colon" do + input = "⇐" + output = [["Character", "⇐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleLeftRightArrow; with a semi-colon" do + input = "⇔" + output = [["Character", "⇔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleLeftTee; with a semi-colon" do + input = "⫤" + output = [["Character", "⫤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleLongLeftArrow; with a semi-colon" do + input = "⟸" + output = [["Character", "⟸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleLongLeftRightArrow; with a semi-colon" do + input = "⟺" + output = [["Character", "⟺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleLongRightArrow; with a semi-colon" do + input = "⟹" + output = [["Character", "⟹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleRightArrow; with a semi-colon" do + input = "⇒" + output = [["Character", "⇒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleRightTee; with a semi-colon" do + input = "⊨" + output = [["Character", "⊨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleUpArrow; with a semi-colon" do + input = "⇑" + output = [["Character", "⇑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleUpDownArrow; with a semi-colon" do + input = "⇕" + output = [["Character", "⇕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DoubleVerticalBar; with a semi-colon" do + input = "∥" + output = [["Character", "∥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownArrow; with a semi-colon" do + input = "↓" + output = [["Character", "↓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownArrowBar; with a semi-colon" do + input = "⤓" + output = [["Character", "⤓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownArrowUpArrow; with a semi-colon" do + input = "⇵" + output = [["Character", "⇵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownBreve; with a semi-colon" do + input = "̑" + output = [["Character", "̑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownLeftRightVector; with a semi-colon" do + input = "⥐" + output = [["Character", "⥐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownLeftTeeVector; with a semi-colon" do + input = "⥞" + output = [["Character", "⥞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownLeftVector; with a semi-colon" do + input = "↽" + output = [["Character", "↽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownLeftVectorBar; with a semi-colon" do + input = "⥖" + output = [["Character", "⥖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part22_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part22_test.exs new file mode 100644 index 00000000..2e3fd806 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part22_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart22Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: DownRightTeeVector; with a semi-colon" do + input = "⥟" + output = [["Character", "⥟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownRightVector; with a semi-colon" do + input = "⇁" + output = [["Character", "⇁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownRightVectorBar; with a semi-colon" do + input = "⥗" + output = [["Character", "⥗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownTee; with a semi-colon" do + input = "⊤" + output = [["Character", "⊤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: DownTeeArrow; with a semi-colon" do + input = "↧" + output = [["Character", "↧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Downarrow; with a semi-colon" do + input = "⇓" + output = [["Character", "⇓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dscr; with a semi-colon" do + input = "𝒟" + output = [["Character", "𝒟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Dstrok; with a semi-colon" do + input = "Đ" + output = [["Character", "Đ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ENG; with a semi-colon" do + input = "Ŋ" + output = [["Character", "Ŋ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ETH without a semi-colon" do + input = "Ð" + output = [["Character", "Ð"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ETH; with a semi-colon" do + input = "Ð" + output = [["Character", "Ð"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Eacute without a semi-colon" do + input = "É" + output = [["Character", "É"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Eacute; with a semi-colon" do + input = "É" + output = [["Character", "É"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ecaron; with a semi-colon" do + input = "Ě" + output = [["Character", "Ě"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ecirc without a semi-colon" do + input = "Ê" + output = [["Character", "Ê"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ecirc; with a semi-colon" do + input = "Ê" + output = [["Character", "Ê"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ecy; with a semi-colon" do + input = "Э" + output = [["Character", "Э"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Edot; with a semi-colon" do + input = "Ė" + output = [["Character", "Ė"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Efr; with a semi-colon" do + input = "𝔈" + output = [["Character", "𝔈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Egrave without a semi-colon" do + input = "È" + output = [["Character", "È"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Egrave; with a semi-colon" do + input = "È" + output = [["Character", "È"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Element; with a semi-colon" do + input = "∈" + output = [["Character", "∈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Emacr; with a semi-colon" do + input = "Ē" + output = [["Character", "Ē"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: EmptySmallSquare; with a semi-colon" do + input = "◻" + output = [["Character", "◻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: EmptyVerySmallSquare; with a semi-colon" do + input = "▫" + output = [["Character", "▫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Eogon; with a semi-colon" do + input = "Ę" + output = [["Character", "Ę"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Eopf; with a semi-colon" do + input = "𝔼" + output = [["Character", "𝔼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Epsilon; with a semi-colon" do + input = "Ε" + output = [["Character", "Ε"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Equal; with a semi-colon" do + input = "⩵" + output = [["Character", "⩵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: EqualTilde; with a semi-colon" do + input = "≂" + output = [["Character", "≂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Equilibrium; with a semi-colon" do + input = "⇌" + output = [["Character", "⇌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Escr; with a semi-colon" do + input = "ℰ" + output = [["Character", "ℰ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Esim; with a semi-colon" do + input = "⩳" + output = [["Character", "⩳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Eta; with a semi-colon" do + input = "Η" + output = [["Character", "Η"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Euml without a semi-colon" do + input = "Ë" + output = [["Character", "Ë"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Euml; with a semi-colon" do + input = "Ë" + output = [["Character", "Ë"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Exists; with a semi-colon" do + input = "∃" + output = [["Character", "∃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ExponentialE; with a semi-colon" do + input = "ⅇ" + output = [["Character", "ⅇ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Fcy; with a semi-colon" do + input = "Ф" + output = [["Character", "Ф"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ffr; with a semi-colon" do + input = "𝔉" + output = [["Character", "𝔉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: FilledSmallSquare; with a semi-colon" do + input = "◼" + output = [["Character", "◼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: FilledVerySmallSquare; with a semi-colon" do + input = "▪" + output = [["Character", "▪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Fopf; with a semi-colon" do + input = "𝔽" + output = [["Character", "𝔽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ForAll; with a semi-colon" do + input = "∀" + output = [["Character", "∀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Fouriertrf; with a semi-colon" do + input = "ℱ" + output = [["Character", "ℱ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Fscr; with a semi-colon" do + input = "ℱ" + output = [["Character", "ℱ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GJcy; with a semi-colon" do + input = "Ѓ" + output = [["Character", "Ѓ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GT without a semi-colon" do + input = ">" + output = [["Character", ">"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GT; with a semi-colon" do + input = ">" + output = [["Character", ">"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gamma; with a semi-colon" do + input = "Γ" + output = [["Character", "Γ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gammad; with a semi-colon" do + input = "Ϝ" + output = [["Character", "Ϝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gbreve; with a semi-colon" do + input = "Ğ" + output = [["Character", "Ğ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gcedil; with a semi-colon" do + input = "Ģ" + output = [["Character", "Ģ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gcirc; with a semi-colon" do + input = "Ĝ" + output = [["Character", "Ĝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gcy; with a semi-colon" do + input = "Г" + output = [["Character", "Г"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gdot; with a semi-colon" do + input = "Ġ" + output = [["Character", "Ġ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gfr; with a semi-colon" do + input = "𝔊" + output = [["Character", "𝔊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gg; with a semi-colon" do + input = "⋙" + output = [["Character", "⋙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gopf; with a semi-colon" do + input = "𝔾" + output = [["Character", "𝔾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterEqual; with a semi-colon" do + input = "≥" + output = [["Character", "≥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterEqualLess; with a semi-colon" do + input = "⋛" + output = [["Character", "⋛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterFullEqual; with a semi-colon" do + input = "≧" + output = [["Character", "≧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterGreater; with a semi-colon" do + input = "⪢" + output = [["Character", "⪢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterLess; with a semi-colon" do + input = "≷" + output = [["Character", "≷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterSlantEqual; with a semi-colon" do + input = "⩾" + output = [["Character", "⩾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: GreaterTilde; with a semi-colon" do + input = "≳" + output = [["Character", "≳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gscr; with a semi-colon" do + input = "𝒢" + output = [["Character", "𝒢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Gt; with a semi-colon" do + input = "≫" + output = [["Character", "≫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: HARDcy; with a semi-colon" do + input = "Ъ" + output = [["Character", "Ъ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hacek; with a semi-colon" do + input = "ˇ" + output = [["Character", "ˇ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hat; with a semi-colon" do + input = "^" + output = [["Character", "^"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hcirc; with a semi-colon" do + input = "Ĥ" + output = [["Character", "Ĥ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hfr; with a semi-colon" do + input = "ℌ" + output = [["Character", "ℌ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: HilbertSpace; with a semi-colon" do + input = "ℋ" + output = [["Character", "ℋ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hopf; with a semi-colon" do + input = "ℍ" + output = [["Character", "ℍ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: HorizontalLine; with a semi-colon" do + input = "─" + output = [["Character", "─"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hscr; with a semi-colon" do + input = "ℋ" + output = [["Character", "ℋ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Hstrok; with a semi-colon" do + input = "Ħ" + output = [["Character", "Ħ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: HumpDownHump; with a semi-colon" do + input = "≎" + output = [["Character", "≎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: HumpEqual; with a semi-colon" do + input = "≏" + output = [["Character", "≏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: IEcy; with a semi-colon" do + input = "Е" + output = [["Character", "Е"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: IJlig; with a semi-colon" do + input = "IJ" + output = [["Character", "IJ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: IOcy; with a semi-colon" do + input = "Ё" + output = [["Character", "Ё"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iacute without a semi-colon" do + input = "Í" + output = [["Character", "Í"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iacute; with a semi-colon" do + input = "Í" + output = [["Character", "Í"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Icirc without a semi-colon" do + input = "Î" + output = [["Character", "Î"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Icirc; with a semi-colon" do + input = "Î" + output = [["Character", "Î"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Icy; with a semi-colon" do + input = "И" + output = [["Character", "И"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Idot; with a semi-colon" do + input = "İ" + output = [["Character", "İ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ifr; with a semi-colon" do + input = "ℑ" + output = [["Character", "ℑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Igrave without a semi-colon" do + input = "Ì" + output = [["Character", "Ì"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Igrave; with a semi-colon" do + input = "Ì" + output = [["Character", "Ì"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Im; with a semi-colon" do + input = "ℑ" + output = [["Character", "ℑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Imacr; with a semi-colon" do + input = "Ī" + output = [["Character", "Ī"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ImaginaryI; with a semi-colon" do + input = "ⅈ" + output = [["Character", "ⅈ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Implies; with a semi-colon" do + input = "⇒" + output = [["Character", "⇒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Int; with a semi-colon" do + input = "∬" + output = [["Character", "∬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Integral; with a semi-colon" do + input = "∫" + output = [["Character", "∫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Intersection; with a semi-colon" do + input = "⋂" + output = [["Character", "⋂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: InvisibleComma; with a semi-colon" do + input = "⁣" + output = [["Character", "⁣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part23_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part23_test.exs new file mode 100644 index 00000000..028b1467 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part23_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart23Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: InvisibleTimes; with a semi-colon" do + input = "⁢" + output = [["Character", "⁢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iogon; with a semi-colon" do + input = "Į" + output = [["Character", "Į"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iopf; with a semi-colon" do + input = "𝕀" + output = [["Character", "𝕀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iota; with a semi-colon" do + input = "Ι" + output = [["Character", "Ι"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iscr; with a semi-colon" do + input = "ℐ" + output = [["Character", "ℐ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Itilde; with a semi-colon" do + input = "Ĩ" + output = [["Character", "Ĩ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iukcy; with a semi-colon" do + input = "І" + output = [["Character", "І"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iuml without a semi-colon" do + input = "Ï" + output = [["Character", "Ï"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Iuml; with a semi-colon" do + input = "Ï" + output = [["Character", "Ï"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jcirc; with a semi-colon" do + input = "Ĵ" + output = [["Character", "Ĵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jcy; with a semi-colon" do + input = "Й" + output = [["Character", "Й"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jfr; with a semi-colon" do + input = "𝔍" + output = [["Character", "𝔍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jopf; with a semi-colon" do + input = "𝕁" + output = [["Character", "𝕁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jscr; with a semi-colon" do + input = "𝒥" + output = [["Character", "𝒥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jsercy; with a semi-colon" do + input = "Ј" + output = [["Character", "Ј"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Jukcy; with a semi-colon" do + input = "Є" + output = [["Character", "Є"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: KHcy; with a semi-colon" do + input = "Х" + output = [["Character", "Х"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: KJcy; with a semi-colon" do + input = "Ќ" + output = [["Character", "Ќ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Kappa; with a semi-colon" do + input = "Κ" + output = [["Character", "Κ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Kcedil; with a semi-colon" do + input = "Ķ" + output = [["Character", "Ķ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Kcy; with a semi-colon" do + input = "К" + output = [["Character", "К"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Kfr; with a semi-colon" do + input = "𝔎" + output = [["Character", "𝔎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Kopf; with a semi-colon" do + input = "𝕂" + output = [["Character", "𝕂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Kscr; with a semi-colon" do + input = "𝒦" + output = [["Character", "𝒦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LJcy; with a semi-colon" do + input = "Љ" + output = [["Character", "Љ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LT without a semi-colon" do + input = "<" + output = [["Character", "<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LT; with a semi-colon" do + input = "<" + output = [["Character", "<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lacute; with a semi-colon" do + input = "Ĺ" + output = [["Character", "Ĺ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lambda; with a semi-colon" do + input = "Λ" + output = [["Character", "Λ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lang; with a semi-colon" do + input = "⟪" + output = [["Character", "⟪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Laplacetrf; with a semi-colon" do + input = "ℒ" + output = [["Character", "ℒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Larr; with a semi-colon" do + input = "↞" + output = [["Character", "↞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lcaron; with a semi-colon" do + input = "Ľ" + output = [["Character", "Ľ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lcedil; with a semi-colon" do + input = "Ļ" + output = [["Character", "Ļ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lcy; with a semi-colon" do + input = "Л" + output = [["Character", "Л"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftAngleBracket; with a semi-colon" do + input = "⟨" + output = [["Character", "⟨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftArrow; with a semi-colon" do + input = "←" + output = [["Character", "←"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftArrowBar; with a semi-colon" do + input = "⇤" + output = [["Character", "⇤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftArrowRightArrow; with a semi-colon" do + input = "⇆" + output = [["Character", "⇆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftCeiling; with a semi-colon" do + input = "⌈" + output = [["Character", "⌈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftDoubleBracket; with a semi-colon" do + input = "⟦" + output = [["Character", "⟦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftDownTeeVector; with a semi-colon" do + input = "⥡" + output = [["Character", "⥡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftDownVector; with a semi-colon" do + input = "⇃" + output = [["Character", "⇃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftDownVectorBar; with a semi-colon" do + input = "⥙" + output = [["Character", "⥙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftFloor; with a semi-colon" do + input = "⌊" + output = [["Character", "⌊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftRightArrow; with a semi-colon" do + input = "↔" + output = [["Character", "↔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftRightVector; with a semi-colon" do + input = "⥎" + output = [["Character", "⥎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftTee; with a semi-colon" do + input = "⊣" + output = [["Character", "⊣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftTeeArrow; with a semi-colon" do + input = "↤" + output = [["Character", "↤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftTeeVector; with a semi-colon" do + input = "⥚" + output = [["Character", "⥚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftTriangle; with a semi-colon" do + input = "⊲" + output = [["Character", "⊲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftTriangleBar; with a semi-colon" do + input = "⧏" + output = [["Character", "⧏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftTriangleEqual; with a semi-colon" do + input = "⊴" + output = [["Character", "⊴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftUpDownVector; with a semi-colon" do + input = "⥑" + output = [["Character", "⥑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftUpTeeVector; with a semi-colon" do + input = "⥠" + output = [["Character", "⥠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftUpVector; with a semi-colon" do + input = "↿" + output = [["Character", "↿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftUpVectorBar; with a semi-colon" do + input = "⥘" + output = [["Character", "⥘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftVector; with a semi-colon" do + input = "↼" + output = [["Character", "↼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LeftVectorBar; with a semi-colon" do + input = "⥒" + output = [["Character", "⥒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Leftarrow; with a semi-colon" do + input = "⇐" + output = [["Character", "⇐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Leftrightarrow; with a semi-colon" do + input = "⇔" + output = [["Character", "⇔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LessEqualGreater; with a semi-colon" do + input = "⋚" + output = [["Character", "⋚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LessFullEqual; with a semi-colon" do + input = "≦" + output = [["Character", "≦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LessGreater; with a semi-colon" do + input = "≶" + output = [["Character", "≶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LessLess; with a semi-colon" do + input = "⪡" + output = [["Character", "⪡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LessSlantEqual; with a semi-colon" do + input = "⩽" + output = [["Character", "⩽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LessTilde; with a semi-colon" do + input = "≲" + output = [["Character", "≲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lfr; with a semi-colon" do + input = "𝔏" + output = [["Character", "𝔏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ll; with a semi-colon" do + input = "⋘" + output = [["Character", "⋘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lleftarrow; with a semi-colon" do + input = "⇚" + output = [["Character", "⇚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lmidot; with a semi-colon" do + input = "Ŀ" + output = [["Character", "Ŀ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LongLeftArrow; with a semi-colon" do + input = "⟵" + output = [["Character", "⟵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LongLeftRightArrow; with a semi-colon" do + input = "⟷" + output = [["Character", "⟷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LongRightArrow; with a semi-colon" do + input = "⟶" + output = [["Character", "⟶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Longleftarrow; with a semi-colon" do + input = "⟸" + output = [["Character", "⟸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Longleftrightarrow; with a semi-colon" do + input = "⟺" + output = [["Character", "⟺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Longrightarrow; with a semi-colon" do + input = "⟹" + output = [["Character", "⟹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lopf; with a semi-colon" do + input = "𝕃" + output = [["Character", "𝕃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LowerLeftArrow; with a semi-colon" do + input = "↙" + output = [["Character", "↙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: LowerRightArrow; with a semi-colon" do + input = "↘" + output = [["Character", "↘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lscr; with a semi-colon" do + input = "ℒ" + output = [["Character", "ℒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lsh; with a semi-colon" do + input = "↰" + output = [["Character", "↰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lstrok; with a semi-colon" do + input = "Ł" + output = [["Character", "Ł"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Lt; with a semi-colon" do + input = "≪" + output = [["Character", "≪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Map; with a semi-colon" do + input = "⤅" + output = [["Character", "⤅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Mcy; with a semi-colon" do + input = "М" + output = [["Character", "М"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: MediumSpace; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Mellintrf; with a semi-colon" do + input = "ℳ" + output = [["Character", "ℳ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Mfr; with a semi-colon" do + input = "𝔐" + output = [["Character", "𝔐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: MinusPlus; with a semi-colon" do + input = "∓" + output = [["Character", "∓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Mopf; with a semi-colon" do + input = "𝕄" + output = [["Character", "𝕄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Mscr; with a semi-colon" do + input = "ℳ" + output = [["Character", "ℳ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Mu; with a semi-colon" do + input = "Μ" + output = [["Character", "Μ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NJcy; with a semi-colon" do + input = "Њ" + output = [["Character", "Њ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Nacute; with a semi-colon" do + input = "Ń" + output = [["Character", "Ń"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ncaron; with a semi-colon" do + input = "Ň" + output = [["Character", "Ň"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ncedil; with a semi-colon" do + input = "Ņ" + output = [["Character", "Ņ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ncy; with a semi-colon" do + input = "Н" + output = [["Character", "Н"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NegativeMediumSpace; with a semi-colon" do + input = "​" + output = [["Character", "​"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NegativeThickSpace; with a semi-colon" do + input = "​" + output = [["Character", "​"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part24_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part24_test.exs new file mode 100644 index 00000000..369944ad --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part24_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart24Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: NegativeThinSpace; with a semi-colon" do + input = "​" + output = [["Character", "​"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NegativeVeryThinSpace; with a semi-colon" do + input = "​" + output = [["Character", "​"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NestedGreaterGreater; with a semi-colon" do + input = "≫" + output = [["Character", "≫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NestedLessLess; with a semi-colon" do + input = "≪" + output = [["Character", "≪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NewLine; with a semi-colon" do + input = " " + output = [["Character", "\n"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Nfr; with a semi-colon" do + input = "𝔑" + output = [["Character", "𝔑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NoBreak; with a semi-colon" do + input = "⁠" + output = [["Character", "⁠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NonBreakingSpace; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Nopf; with a semi-colon" do + input = "ℕ" + output = [["Character", "ℕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Not; with a semi-colon" do + input = "⫬" + output = [["Character", "⫬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotCongruent; with a semi-colon" do + input = "≢" + output = [["Character", "≢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotCupCap; with a semi-colon" do + input = "≭" + output = [["Character", "≭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotDoubleVerticalBar; with a semi-colon" do + input = "∦" + output = [["Character", "∦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotElement; with a semi-colon" do + input = "∉" + output = [["Character", "∉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotEqual; with a semi-colon" do + input = "≠" + output = [["Character", "≠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotEqualTilde; with a semi-colon" do + input = "≂̸" + output = [["Character", "≂̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotExists; with a semi-colon" do + input = "∄" + output = [["Character", "∄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreater; with a semi-colon" do + input = "≯" + output = [["Character", "≯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreaterEqual; with a semi-colon" do + input = "≱" + output = [["Character", "≱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreaterFullEqual; with a semi-colon" do + input = "≧̸" + output = [["Character", "≧̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreaterGreater; with a semi-colon" do + input = "≫̸" + output = [["Character", "≫̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreaterLess; with a semi-colon" do + input = "≹" + output = [["Character", "≹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreaterSlantEqual; with a semi-colon" do + input = "⩾̸" + output = [["Character", "⩾̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotGreaterTilde; with a semi-colon" do + input = "≵" + output = [["Character", "≵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotHumpDownHump; with a semi-colon" do + input = "≎̸" + output = [["Character", "≎̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotHumpEqual; with a semi-colon" do + input = "≏̸" + output = [["Character", "≏̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLeftTriangle; with a semi-colon" do + input = "⋪" + output = [["Character", "⋪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLeftTriangleBar; with a semi-colon" do + input = "⧏̸" + output = [["Character", "⧏̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLeftTriangleEqual; with a semi-colon" do + input = "⋬" + output = [["Character", "⋬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLess; with a semi-colon" do + input = "≮" + output = [["Character", "≮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLessEqual; with a semi-colon" do + input = "≰" + output = [["Character", "≰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLessGreater; with a semi-colon" do + input = "≸" + output = [["Character", "≸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLessLess; with a semi-colon" do + input = "≪̸" + output = [["Character", "≪̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLessSlantEqual; with a semi-colon" do + input = "⩽̸" + output = [["Character", "⩽̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotLessTilde; with a semi-colon" do + input = "≴" + output = [["Character", "≴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotNestedGreaterGreater; with a semi-colon" do + input = "⪢̸" + output = [["Character", "⪢̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotNestedLessLess; with a semi-colon" do + input = "⪡̸" + output = [["Character", "⪡̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotPrecedes; with a semi-colon" do + input = "⊀" + output = [["Character", "⊀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotPrecedesEqual; with a semi-colon" do + input = "⪯̸" + output = [["Character", "⪯̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotPrecedesSlantEqual; with a semi-colon" do + input = "⋠" + output = [["Character", "⋠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotReverseElement; with a semi-colon" do + input = "∌" + output = [["Character", "∌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotRightTriangle; with a semi-colon" do + input = "⋫" + output = [["Character", "⋫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotRightTriangleBar; with a semi-colon" do + input = "⧐̸" + output = [["Character", "⧐̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotRightTriangleEqual; with a semi-colon" do + input = "⋭" + output = [["Character", "⋭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSquareSubset; with a semi-colon" do + input = "⊏̸" + output = [["Character", "⊏̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSquareSubsetEqual; with a semi-colon" do + input = "⋢" + output = [["Character", "⋢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSquareSuperset; with a semi-colon" do + input = "⊐̸" + output = [["Character", "⊐̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSquareSupersetEqual; with a semi-colon" do + input = "⋣" + output = [["Character", "⋣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSubset; with a semi-colon" do + input = "⊂⃒" + output = [["Character", "⊂⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSubsetEqual; with a semi-colon" do + input = "⊈" + output = [["Character", "⊈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSucceeds; with a semi-colon" do + input = "⊁" + output = [["Character", "⊁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSucceedsEqual; with a semi-colon" do + input = "⪰̸" + output = [["Character", "⪰̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSucceedsSlantEqual; with a semi-colon" do + input = "⋡" + output = [["Character", "⋡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSucceedsTilde; with a semi-colon" do + input = "≿̸" + output = [["Character", "≿̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSuperset; with a semi-colon" do + input = "⊃⃒" + output = [["Character", "⊃⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotSupersetEqual; with a semi-colon" do + input = "⊉" + output = [["Character", "⊉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotTilde; with a semi-colon" do + input = "≁" + output = [["Character", "≁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotTildeEqual; with a semi-colon" do + input = "≄" + output = [["Character", "≄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotTildeFullEqual; with a semi-colon" do + input = "≇" + output = [["Character", "≇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotTildeTilde; with a semi-colon" do + input = "≉" + output = [["Character", "≉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: NotVerticalBar; with a semi-colon" do + input = "∤" + output = [["Character", "∤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Nscr; with a semi-colon" do + input = "𝒩" + output = [["Character", "𝒩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ntilde without a semi-colon" do + input = "Ñ" + output = [["Character", "Ñ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ntilde; with a semi-colon" do + input = "Ñ" + output = [["Character", "Ñ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Nu; with a semi-colon" do + input = "Ν" + output = [["Character", "Ν"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OElig; with a semi-colon" do + input = "Œ" + output = [["Character", "Œ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Oacute without a semi-colon" do + input = "Ó" + output = [["Character", "Ó"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Oacute; with a semi-colon" do + input = "Ó" + output = [["Character", "Ó"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ocirc without a semi-colon" do + input = "Ô" + output = [["Character", "Ô"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ocirc; with a semi-colon" do + input = "Ô" + output = [["Character", "Ô"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ocy; with a semi-colon" do + input = "О" + output = [["Character", "О"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Odblac; with a semi-colon" do + input = "Ő" + output = [["Character", "Ő"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ofr; with a semi-colon" do + input = "𝔒" + output = [["Character", "𝔒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ograve without a semi-colon" do + input = "Ò" + output = [["Character", "Ò"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ograve; with a semi-colon" do + input = "Ò" + output = [["Character", "Ò"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Omacr; with a semi-colon" do + input = "Ō" + output = [["Character", "Ō"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Omega; with a semi-colon" do + input = "Ω" + output = [["Character", "Ω"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Omicron; with a semi-colon" do + input = "Ο" + output = [["Character", "Ο"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Oopf; with a semi-colon" do + input = "𝕆" + output = [["Character", "𝕆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OpenCurlyDoubleQuote; with a semi-colon" do + input = "“" + output = [["Character", "“"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OpenCurlyQuote; with a semi-colon" do + input = "‘" + output = [["Character", "‘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Or; with a semi-colon" do + input = "⩔" + output = [["Character", "⩔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Oscr; with a semi-colon" do + input = "𝒪" + output = [["Character", "𝒪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Oslash without a semi-colon" do + input = "Ø" + output = [["Character", "Ø"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Oslash; with a semi-colon" do + input = "Ø" + output = [["Character", "Ø"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Otilde without a semi-colon" do + input = "Õ" + output = [["Character", "Õ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Otilde; with a semi-colon" do + input = "Õ" + output = [["Character", "Õ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Otimes; with a semi-colon" do + input = "⨷" + output = [["Character", "⨷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ouml without a semi-colon" do + input = "Ö" + output = [["Character", "Ö"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ouml; with a semi-colon" do + input = "Ö" + output = [["Character", "Ö"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OverBar; with a semi-colon" do + input = "‾" + output = [["Character", "‾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OverBrace; with a semi-colon" do + input = "⏞" + output = [["Character", "⏞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OverBracket; with a semi-colon" do + input = "⎴" + output = [["Character", "⎴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: OverParenthesis; with a semi-colon" do + input = "⏜" + output = [["Character", "⏜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: PartialD; with a semi-colon" do + input = "∂" + output = [["Character", "∂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Pcy; with a semi-colon" do + input = "П" + output = [["Character", "П"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Pfr; with a semi-colon" do + input = "𝔓" + output = [["Character", "𝔓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Phi; with a semi-colon" do + input = "Φ" + output = [["Character", "Φ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Pi; with a semi-colon" do + input = "Π" + output = [["Character", "Π"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: PlusMinus; with a semi-colon" do + input = "±" + output = [["Character", "±"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part25_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part25_test.exs new file mode 100644 index 00000000..32a2cae0 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part25_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart25Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: Poincareplane; with a semi-colon" do + input = "ℌ" + output = [["Character", "ℌ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Popf; with a semi-colon" do + input = "ℙ" + output = [["Character", "ℙ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Pr; with a semi-colon" do + input = "⪻" + output = [["Character", "⪻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Precedes; with a semi-colon" do + input = "≺" + output = [["Character", "≺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: PrecedesEqual; with a semi-colon" do + input = "⪯" + output = [["Character", "⪯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: PrecedesSlantEqual; with a semi-colon" do + input = "≼" + output = [["Character", "≼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: PrecedesTilde; with a semi-colon" do + input = "≾" + output = [["Character", "≾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Prime; with a semi-colon" do + input = "″" + output = [["Character", "″"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Product; with a semi-colon" do + input = "∏" + output = [["Character", "∏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Proportion; with a semi-colon" do + input = "∷" + output = [["Character", "∷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Proportional; with a semi-colon" do + input = "∝" + output = [["Character", "∝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Pscr; with a semi-colon" do + input = "𝒫" + output = [["Character", "𝒫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Psi; with a semi-colon" do + input = "Ψ" + output = [["Character", "Ψ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: QUOT without a semi-colon" do + input = """ + output = [["Character", "\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: QUOT; with a semi-colon" do + input = """ + output = [["Character", "\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Qfr; with a semi-colon" do + input = "𝔔" + output = [["Character", "𝔔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Qopf; with a semi-colon" do + input = "ℚ" + output = [["Character", "ℚ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Qscr; with a semi-colon" do + input = "𝒬" + output = [["Character", "𝒬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RBarr; with a semi-colon" do + input = "⤐" + output = [["Character", "⤐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: REG without a semi-colon" do + input = "®" + output = [["Character", "®"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: REG; with a semi-colon" do + input = "®" + output = [["Character", "®"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Racute; with a semi-colon" do + input = "Ŕ" + output = [["Character", "Ŕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rang; with a semi-colon" do + input = "⟫" + output = [["Character", "⟫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rarr; with a semi-colon" do + input = "↠" + output = [["Character", "↠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rarrtl; with a semi-colon" do + input = "⤖" + output = [["Character", "⤖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rcaron; with a semi-colon" do + input = "Ř" + output = [["Character", "Ř"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rcedil; with a semi-colon" do + input = "Ŗ" + output = [["Character", "Ŗ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rcy; with a semi-colon" do + input = "Р" + output = [["Character", "Р"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Re; with a semi-colon" do + input = "ℜ" + output = [["Character", "ℜ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ReverseElement; with a semi-colon" do + input = "∋" + output = [["Character", "∋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ReverseEquilibrium; with a semi-colon" do + input = "⇋" + output = [["Character", "⇋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ReverseUpEquilibrium; with a semi-colon" do + input = "⥯" + output = [["Character", "⥯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rfr; with a semi-colon" do + input = "ℜ" + output = [["Character", "ℜ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rho; with a semi-colon" do + input = "Ρ" + output = [["Character", "Ρ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightAngleBracket; with a semi-colon" do + input = "⟩" + output = [["Character", "⟩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightArrow; with a semi-colon" do + input = "→" + output = [["Character", "→"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightArrowBar; with a semi-colon" do + input = "⇥" + output = [["Character", "⇥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightArrowLeftArrow; with a semi-colon" do + input = "⇄" + output = [["Character", "⇄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightCeiling; with a semi-colon" do + input = "⌉" + output = [["Character", "⌉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightDoubleBracket; with a semi-colon" do + input = "⟧" + output = [["Character", "⟧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightDownTeeVector; with a semi-colon" do + input = "⥝" + output = [["Character", "⥝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightDownVector; with a semi-colon" do + input = "⇂" + output = [["Character", "⇂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightDownVectorBar; with a semi-colon" do + input = "⥕" + output = [["Character", "⥕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightFloor; with a semi-colon" do + input = "⌋" + output = [["Character", "⌋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightTee; with a semi-colon" do + input = "⊢" + output = [["Character", "⊢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightTeeArrow; with a semi-colon" do + input = "↦" + output = [["Character", "↦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightTeeVector; with a semi-colon" do + input = "⥛" + output = [["Character", "⥛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightTriangle; with a semi-colon" do + input = "⊳" + output = [["Character", "⊳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightTriangleBar; with a semi-colon" do + input = "⧐" + output = [["Character", "⧐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightTriangleEqual; with a semi-colon" do + input = "⊵" + output = [["Character", "⊵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightUpDownVector; with a semi-colon" do + input = "⥏" + output = [["Character", "⥏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightUpTeeVector; with a semi-colon" do + input = "⥜" + output = [["Character", "⥜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightUpVector; with a semi-colon" do + input = "↾" + output = [["Character", "↾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightUpVectorBar; with a semi-colon" do + input = "⥔" + output = [["Character", "⥔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightVector; with a semi-colon" do + input = "⇀" + output = [["Character", "⇀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RightVectorBar; with a semi-colon" do + input = "⥓" + output = [["Character", "⥓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rightarrow; with a semi-colon" do + input = "⇒" + output = [["Character", "⇒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ropf; with a semi-colon" do + input = "ℝ" + output = [["Character", "ℝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RoundImplies; with a semi-colon" do + input = "⥰" + output = [["Character", "⥰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rrightarrow; with a semi-colon" do + input = "⇛" + output = [["Character", "⇛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rscr; with a semi-colon" do + input = "ℛ" + output = [["Character", "ℛ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Rsh; with a semi-colon" do + input = "↱" + output = [["Character", "↱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: RuleDelayed; with a semi-colon" do + input = "⧴" + output = [["Character", "⧴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SHCHcy; with a semi-colon" do + input = "Щ" + output = [["Character", "Щ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SHcy; with a semi-colon" do + input = "Ш" + output = [["Character", "Ш"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SOFTcy; with a semi-colon" do + input = "Ь" + output = [["Character", "Ь"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sacute; with a semi-colon" do + input = "Ś" + output = [["Character", "Ś"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sc; with a semi-colon" do + input = "⪼" + output = [["Character", "⪼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Scaron; with a semi-colon" do + input = "Š" + output = [["Character", "Š"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Scedil; with a semi-colon" do + input = "Ş" + output = [["Character", "Ş"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Scirc; with a semi-colon" do + input = "Ŝ" + output = [["Character", "Ŝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Scy; with a semi-colon" do + input = "С" + output = [["Character", "С"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sfr; with a semi-colon" do + input = "𝔖" + output = [["Character", "𝔖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ShortDownArrow; with a semi-colon" do + input = "↓" + output = [["Character", "↓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ShortLeftArrow; with a semi-colon" do + input = "←" + output = [["Character", "←"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ShortRightArrow; with a semi-colon" do + input = "→" + output = [["Character", "→"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ShortUpArrow; with a semi-colon" do + input = "↑" + output = [["Character", "↑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sigma; with a semi-colon" do + input = "Σ" + output = [["Character", "Σ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SmallCircle; with a semi-colon" do + input = "∘" + output = [["Character", "∘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sopf; with a semi-colon" do + input = "𝕊" + output = [["Character", "𝕊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sqrt; with a semi-colon" do + input = "√" + output = [["Character", "√"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Square; with a semi-colon" do + input = "□" + output = [["Character", "□"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SquareIntersection; with a semi-colon" do + input = "⊓" + output = [["Character", "⊓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SquareSubset; with a semi-colon" do + input = "⊏" + output = [["Character", "⊏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SquareSubsetEqual; with a semi-colon" do + input = "⊑" + output = [["Character", "⊑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SquareSuperset; with a semi-colon" do + input = "⊐" + output = [["Character", "⊐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SquareSupersetEqual; with a semi-colon" do + input = "⊒" + output = [["Character", "⊒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SquareUnion; with a semi-colon" do + input = "⊔" + output = [["Character", "⊔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sscr; with a semi-colon" do + input = "𝒮" + output = [["Character", "𝒮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Star; with a semi-colon" do + input = "⋆" + output = [["Character", "⋆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sub; with a semi-colon" do + input = "⋐" + output = [["Character", "⋐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Subset; with a semi-colon" do + input = "⋐" + output = [["Character", "⋐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SubsetEqual; with a semi-colon" do + input = "⊆" + output = [["Character", "⊆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Succeeds; with a semi-colon" do + input = "≻" + output = [["Character", "≻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SucceedsEqual; with a semi-colon" do + input = "⪰" + output = [["Character", "⪰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SucceedsSlantEqual; with a semi-colon" do + input = "≽" + output = [["Character", "≽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SucceedsTilde; with a semi-colon" do + input = "≿" + output = [["Character", "≿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SuchThat; with a semi-colon" do + input = "∋" + output = [["Character", "∋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sum; with a semi-colon" do + input = "∑" + output = [["Character", "∑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Sup; with a semi-colon" do + input = "⋑" + output = [["Character", "⋑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part26_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part26_test.exs new file mode 100644 index 00000000..2fe01647 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part26_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart26Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: Superset; with a semi-colon" do + input = "⊃" + output = [["Character", "⊃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: SupersetEqual; with a semi-colon" do + input = "⊇" + output = [["Character", "⊇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Supset; with a semi-colon" do + input = "⋑" + output = [["Character", "⋑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: THORN without a semi-colon" do + input = "Þ" + output = [["Character", "Þ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: THORN; with a semi-colon" do + input = "Þ" + output = [["Character", "Þ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TRADE; with a semi-colon" do + input = "™" + output = [["Character", "™"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TSHcy; with a semi-colon" do + input = "Ћ" + output = [["Character", "Ћ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TScy; with a semi-colon" do + input = "Ц" + output = [["Character", "Ц"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tab; with a semi-colon" do + input = " " + output = [["Character", "\t"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tau; with a semi-colon" do + input = "Τ" + output = [["Character", "Τ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tcaron; with a semi-colon" do + input = "Ť" + output = [["Character", "Ť"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tcedil; with a semi-colon" do + input = "Ţ" + output = [["Character", "Ţ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tcy; with a semi-colon" do + input = "Т" + output = [["Character", "Т"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tfr; with a semi-colon" do + input = "𝔗" + output = [["Character", "𝔗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Therefore; with a semi-colon" do + input = "∴" + output = [["Character", "∴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Theta; with a semi-colon" do + input = "Θ" + output = [["Character", "Θ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ThickSpace; with a semi-colon" do + input = "  " + output = [["Character", "  "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ThinSpace; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tilde; with a semi-colon" do + input = "∼" + output = [["Character", "∼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TildeEqual; with a semi-colon" do + input = "≃" + output = [["Character", "≃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TildeFullEqual; with a semi-colon" do + input = "≅" + output = [["Character", "≅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TildeTilde; with a semi-colon" do + input = "≈" + output = [["Character", "≈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Topf; with a semi-colon" do + input = "𝕋" + output = [["Character", "𝕋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: TripleDot; with a semi-colon" do + input = "⃛" + output = [["Character", "⃛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tscr; with a semi-colon" do + input = "𝒯" + output = [["Character", "𝒯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Tstrok; with a semi-colon" do + input = "Ŧ" + output = [["Character", "Ŧ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uacute without a semi-colon" do + input = "Ú" + output = [["Character", "Ú"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uacute; with a semi-colon" do + input = "Ú" + output = [["Character", "Ú"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uarr; with a semi-colon" do + input = "↟" + output = [["Character", "↟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uarrocir; with a semi-colon" do + input = "⥉" + output = [["Character", "⥉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ubrcy; with a semi-colon" do + input = "Ў" + output = [["Character", "Ў"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ubreve; with a semi-colon" do + input = "Ŭ" + output = [["Character", "Ŭ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ucirc without a semi-colon" do + input = "Û" + output = [["Character", "Û"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ucirc; with a semi-colon" do + input = "Û" + output = [["Character", "Û"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ucy; with a semi-colon" do + input = "У" + output = [["Character", "У"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Udblac; with a semi-colon" do + input = "Ű" + output = [["Character", "Ű"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ufr; with a semi-colon" do + input = "𝔘" + output = [["Character", "𝔘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ugrave without a semi-colon" do + input = "Ù" + output = [["Character", "Ù"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ugrave; with a semi-colon" do + input = "Ù" + output = [["Character", "Ù"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Umacr; with a semi-colon" do + input = "Ū" + output = [["Character", "Ū"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UnderBar; with a semi-colon" do + input = "_" + output = [["Character", "_"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UnderBrace; with a semi-colon" do + input = "⏟" + output = [["Character", "⏟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UnderBracket; with a semi-colon" do + input = "⎵" + output = [["Character", "⎵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UnderParenthesis; with a semi-colon" do + input = "⏝" + output = [["Character", "⏝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Union; with a semi-colon" do + input = "⋃" + output = [["Character", "⋃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UnionPlus; with a semi-colon" do + input = "⊎" + output = [["Character", "⊎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uogon; with a semi-colon" do + input = "Ų" + output = [["Character", "Ų"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uopf; with a semi-colon" do + input = "𝕌" + output = [["Character", "𝕌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpArrow; with a semi-colon" do + input = "↑" + output = [["Character", "↑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpArrowBar; with a semi-colon" do + input = "⤒" + output = [["Character", "⤒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpArrowDownArrow; with a semi-colon" do + input = "⇅" + output = [["Character", "⇅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpDownArrow; with a semi-colon" do + input = "↕" + output = [["Character", "↕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpEquilibrium; with a semi-colon" do + input = "⥮" + output = [["Character", "⥮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpTee; with a semi-colon" do + input = "⊥" + output = [["Character", "⊥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpTeeArrow; with a semi-colon" do + input = "↥" + output = [["Character", "↥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uparrow; with a semi-colon" do + input = "⇑" + output = [["Character", "⇑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Updownarrow; with a semi-colon" do + input = "⇕" + output = [["Character", "⇕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpperLeftArrow; with a semi-colon" do + input = "↖" + output = [["Character", "↖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: UpperRightArrow; with a semi-colon" do + input = "↗" + output = [["Character", "↗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Upsi; with a semi-colon" do + input = "ϒ" + output = [["Character", "ϒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Upsilon; with a semi-colon" do + input = "Υ" + output = [["Character", "Υ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uring; with a semi-colon" do + input = "Ů" + output = [["Character", "Ů"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uscr; with a semi-colon" do + input = "𝒰" + output = [["Character", "𝒰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Utilde; with a semi-colon" do + input = "Ũ" + output = [["Character", "Ũ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uuml without a semi-colon" do + input = "Ü" + output = [["Character", "Ü"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Uuml; with a semi-colon" do + input = "Ü" + output = [["Character", "Ü"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: VDash; with a semi-colon" do + input = "⊫" + output = [["Character", "⊫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vbar; with a semi-colon" do + input = "⫫" + output = [["Character", "⫫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vcy; with a semi-colon" do + input = "В" + output = [["Character", "В"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vdash; with a semi-colon" do + input = "⊩" + output = [["Character", "⊩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vdashl; with a semi-colon" do + input = "⫦" + output = [["Character", "⫦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vee; with a semi-colon" do + input = "⋁" + output = [["Character", "⋁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Verbar; with a semi-colon" do + input = "‖" + output = [["Character", "‖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vert; with a semi-colon" do + input = "‖" + output = [["Character", "‖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: VerticalBar; with a semi-colon" do + input = "∣" + output = [["Character", "∣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: VerticalLine; with a semi-colon" do + input = "|" + output = [["Character", "|"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: VerticalSeparator; with a semi-colon" do + input = "❘" + output = [["Character", "❘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: VerticalTilde; with a semi-colon" do + input = "≀" + output = [["Character", "≀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: VeryThinSpace; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vfr; with a semi-colon" do + input = "𝔙" + output = [["Character", "𝔙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vopf; with a semi-colon" do + input = "𝕍" + output = [["Character", "𝕍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vscr; with a semi-colon" do + input = "𝒱" + output = [["Character", "𝒱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Vvdash; with a semi-colon" do + input = "⊪" + output = [["Character", "⊪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Wcirc; with a semi-colon" do + input = "Ŵ" + output = [["Character", "Ŵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Wedge; with a semi-colon" do + input = "⋀" + output = [["Character", "⋀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Wfr; with a semi-colon" do + input = "𝔚" + output = [["Character", "𝔚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Wopf; with a semi-colon" do + input = "𝕎" + output = [["Character", "𝕎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Wscr; with a semi-colon" do + input = "𝒲" + output = [["Character", "𝒲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Xfr; with a semi-colon" do + input = "𝔛" + output = [["Character", "𝔛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Xi; with a semi-colon" do + input = "Ξ" + output = [["Character", "Ξ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Xopf; with a semi-colon" do + input = "𝕏" + output = [["Character", "𝕏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Xscr; with a semi-colon" do + input = "𝒳" + output = [["Character", "𝒳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: YAcy; with a semi-colon" do + input = "Я" + output = [["Character", "Я"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: YIcy; with a semi-colon" do + input = "Ї" + output = [["Character", "Ї"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: YUcy; with a semi-colon" do + input = "Ю" + output = [["Character", "Ю"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Yacute without a semi-colon" do + input = "Ý" + output = [["Character", "Ý"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Yacute; with a semi-colon" do + input = "Ý" + output = [["Character", "Ý"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ycirc; with a semi-colon" do + input = "Ŷ" + output = [["Character", "Ŷ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Ycy; with a semi-colon" do + input = "Ы" + output = [["Character", "Ы"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Yfr; with a semi-colon" do + input = "𝔜" + output = [["Character", "𝔜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part27_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part27_test.exs new file mode 100644 index 00000000..7d05573e --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part27_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart27Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: Yopf; with a semi-colon" do + input = "𝕐" + output = [["Character", "𝕐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Yscr; with a semi-colon" do + input = "𝒴" + output = [["Character", "𝒴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Yuml; with a semi-colon" do + input = "Ÿ" + output = [["Character", "Ÿ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ZHcy; with a semi-colon" do + input = "Ж" + output = [["Character", "Ж"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zacute; with a semi-colon" do + input = "Ź" + output = [["Character", "Ź"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zcaron; with a semi-colon" do + input = "Ž" + output = [["Character", "Ž"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zcy; with a semi-colon" do + input = "З" + output = [["Character", "З"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zdot; with a semi-colon" do + input = "Ż" + output = [["Character", "Ż"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ZeroWidthSpace; with a semi-colon" do + input = "​" + output = [["Character", "​"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zeta; with a semi-colon" do + input = "Ζ" + output = [["Character", "Ζ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zfr; with a semi-colon" do + input = "ℨ" + output = [["Character", "ℨ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zopf; with a semi-colon" do + input = "ℤ" + output = [["Character", "ℤ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: Zscr; with a semi-colon" do + input = "𝒵" + output = [["Character", "𝒵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aacute without a semi-colon" do + input = "á" + output = [["Character", "á"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aacute; with a semi-colon" do + input = "á" + output = [["Character", "á"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: abreve; with a semi-colon" do + input = "ă" + output = [["Character", "ă"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ac; with a semi-colon" do + input = "∾" + output = [["Character", "∾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acE; with a semi-colon" do + input = "∾̳" + output = [["Character", "∾̳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acd; with a semi-colon" do + input = "∿" + output = [["Character", "∿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acirc without a semi-colon" do + input = "â" + output = [["Character", "â"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acirc; with a semi-colon" do + input = "â" + output = [["Character", "â"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acute without a semi-colon" do + input = "´" + output = [["Character", "´"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acute; with a semi-colon" do + input = "´" + output = [["Character", "´"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: acy; with a semi-colon" do + input = "а" + output = [["Character", "а"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aelig without a semi-colon" do + input = "æ" + output = [["Character", "æ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aelig; with a semi-colon" do + input = "æ" + output = [["Character", "æ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: af; with a semi-colon" do + input = "⁡" + output = [["Character", "⁡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: afr; with a semi-colon" do + input = "𝔞" + output = [["Character", "𝔞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: agrave without a semi-colon" do + input = "à" + output = [["Character", "à"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: agrave; with a semi-colon" do + input = "à" + output = [["Character", "à"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: alefsym; with a semi-colon" do + input = "ℵ" + output = [["Character", "ℵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aleph; with a semi-colon" do + input = "ℵ" + output = [["Character", "ℵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: alpha; with a semi-colon" do + input = "α" + output = [["Character", "α"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: amacr; with a semi-colon" do + input = "ā" + output = [["Character", "ā"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: amalg; with a semi-colon" do + input = "⨿" + output = [["Character", "⨿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: amp without a semi-colon" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: amp; with a semi-colon" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: and; with a semi-colon" do + input = "∧" + output = [["Character", "∧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: andand; with a semi-colon" do + input = "⩕" + output = [["Character", "⩕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: andd; with a semi-colon" do + input = "⩜" + output = [["Character", "⩜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: andslope; with a semi-colon" do + input = "⩘" + output = [["Character", "⩘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: andv; with a semi-colon" do + input = "⩚" + output = [["Character", "⩚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ang; with a semi-colon" do + input = "∠" + output = [["Character", "∠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ange; with a semi-colon" do + input = "⦤" + output = [["Character", "⦤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angle; with a semi-colon" do + input = "∠" + output = [["Character", "∠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsd; with a semi-colon" do + input = "∡" + output = [["Character", "∡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdaa; with a semi-colon" do + input = "⦨" + output = [["Character", "⦨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdab; with a semi-colon" do + input = "⦩" + output = [["Character", "⦩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdac; with a semi-colon" do + input = "⦪" + output = [["Character", "⦪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdad; with a semi-colon" do + input = "⦫" + output = [["Character", "⦫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdae; with a semi-colon" do + input = "⦬" + output = [["Character", "⦬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdaf; with a semi-colon" do + input = "⦭" + output = [["Character", "⦭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdag; with a semi-colon" do + input = "⦮" + output = [["Character", "⦮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angmsdah; with a semi-colon" do + input = "⦯" + output = [["Character", "⦯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angrt; with a semi-colon" do + input = "∟" + output = [["Character", "∟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angrtvb; with a semi-colon" do + input = "⊾" + output = [["Character", "⊾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angrtvbd; with a semi-colon" do + input = "⦝" + output = [["Character", "⦝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angsph; with a semi-colon" do + input = "∢" + output = [["Character", "∢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angst; with a semi-colon" do + input = "Å" + output = [["Character", "Å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: angzarr; with a semi-colon" do + input = "⍼" + output = [["Character", "⍼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aogon; with a semi-colon" do + input = "ą" + output = [["Character", "ą"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aopf; with a semi-colon" do + input = "𝕒" + output = [["Character", "𝕒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ap; with a semi-colon" do + input = "≈" + output = [["Character", "≈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: apE; with a semi-colon" do + input = "⩰" + output = [["Character", "⩰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: apacir; with a semi-colon" do + input = "⩯" + output = [["Character", "⩯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ape; with a semi-colon" do + input = "≊" + output = [["Character", "≊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: apid; with a semi-colon" do + input = "≋" + output = [["Character", "≋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: apos; with a semi-colon" do + input = "'" + output = [["Character", "'"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: approx; with a semi-colon" do + input = "≈" + output = [["Character", "≈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: approxeq; with a semi-colon" do + input = "≊" + output = [["Character", "≊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aring without a semi-colon" do + input = "å" + output = [["Character", "å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: aring; with a semi-colon" do + input = "å" + output = [["Character", "å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ascr; with a semi-colon" do + input = "𝒶" + output = [["Character", "𝒶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ast; with a semi-colon" do + input = "*" + output = [["Character", "*"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: asymp; with a semi-colon" do + input = "≈" + output = [["Character", "≈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: asympeq; with a semi-colon" do + input = "≍" + output = [["Character", "≍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: atilde without a semi-colon" do + input = "ã" + output = [["Character", "ã"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: atilde; with a semi-colon" do + input = "ã" + output = [["Character", "ã"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: auml without a semi-colon" do + input = "ä" + output = [["Character", "ä"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: auml; with a semi-colon" do + input = "ä" + output = [["Character", "ä"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: awconint; with a semi-colon" do + input = "∳" + output = [["Character", "∳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: awint; with a semi-colon" do + input = "⨑" + output = [["Character", "⨑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bNot; with a semi-colon" do + input = "⫭" + output = [["Character", "⫭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: backcong; with a semi-colon" do + input = "≌" + output = [["Character", "≌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: backepsilon; with a semi-colon" do + input = "϶" + output = [["Character", "϶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: backprime; with a semi-colon" do + input = "‵" + output = [["Character", "‵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: backsim; with a semi-colon" do + input = "∽" + output = [["Character", "∽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: backsimeq; with a semi-colon" do + input = "⋍" + output = [["Character", "⋍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: barvee; with a semi-colon" do + input = "⊽" + output = [["Character", "⊽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: barwed; with a semi-colon" do + input = "⌅" + output = [["Character", "⌅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: barwedge; with a semi-colon" do + input = "⌅" + output = [["Character", "⌅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bbrk; with a semi-colon" do + input = "⎵" + output = [["Character", "⎵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bbrktbrk; with a semi-colon" do + input = "⎶" + output = [["Character", "⎶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bcong; with a semi-colon" do + input = "≌" + output = [["Character", "≌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bcy; with a semi-colon" do + input = "б" + output = [["Character", "б"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bdquo; with a semi-colon" do + input = "„" + output = [["Character", "„"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: becaus; with a semi-colon" do + input = "∵" + output = [["Character", "∵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: because; with a semi-colon" do + input = "∵" + output = [["Character", "∵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bemptyv; with a semi-colon" do + input = "⦰" + output = [["Character", "⦰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bepsi; with a semi-colon" do + input = "϶" + output = [["Character", "϶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part28_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part28_test.exs new file mode 100644 index 00000000..f3fc9631 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part28_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart28Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: bernou; with a semi-colon" do + input = "ℬ" + output = [["Character", "ℬ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: beta; with a semi-colon" do + input = "β" + output = [["Character", "β"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: beth; with a semi-colon" do + input = "ℶ" + output = [["Character", "ℶ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: between; with a semi-colon" do + input = "≬" + output = [["Character", "≬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bfr; with a semi-colon" do + input = "𝔟" + output = [["Character", "𝔟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigcap; with a semi-colon" do + input = "⋂" + output = [["Character", "⋂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigcirc; with a semi-colon" do + input = "◯" + output = [["Character", "◯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigcup; with a semi-colon" do + input = "⋃" + output = [["Character", "⋃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigodot; with a semi-colon" do + input = "⨀" + output = [["Character", "⨀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigoplus; with a semi-colon" do + input = "⨁" + output = [["Character", "⨁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigotimes; with a semi-colon" do + input = "⨂" + output = [["Character", "⨂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigsqcup; with a semi-colon" do + input = "⨆" + output = [["Character", "⨆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigstar; with a semi-colon" do + input = "★" + output = [["Character", "★"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigtriangledown; with a semi-colon" do + input = "▽" + output = [["Character", "▽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigtriangleup; with a semi-colon" do + input = "△" + output = [["Character", "△"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: biguplus; with a semi-colon" do + input = "⨄" + output = [["Character", "⨄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigvee; with a semi-colon" do + input = "⋁" + output = [["Character", "⋁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bigwedge; with a semi-colon" do + input = "⋀" + output = [["Character", "⋀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bkarow; with a semi-colon" do + input = "⤍" + output = [["Character", "⤍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blacklozenge; with a semi-colon" do + input = "⧫" + output = [["Character", "⧫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blacksquare; with a semi-colon" do + input = "▪" + output = [["Character", "▪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blacktriangle; with a semi-colon" do + input = "▴" + output = [["Character", "▴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blacktriangledown; with a semi-colon" do + input = "▾" + output = [["Character", "▾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blacktriangleleft; with a semi-colon" do + input = "◂" + output = [["Character", "◂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blacktriangleright; with a semi-colon" do + input = "▸" + output = [["Character", "▸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blank; with a semi-colon" do + input = "␣" + output = [["Character", "␣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blk12; with a semi-colon" do + input = "▒" + output = [["Character", "▒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blk14; with a semi-colon" do + input = "░" + output = [["Character", "░"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: blk34; with a semi-colon" do + input = "▓" + output = [["Character", "▓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: block; with a semi-colon" do + input = "█" + output = [["Character", "█"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bne; with a semi-colon" do + input = "=⃥" + output = [["Character", "=⃥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bnequiv; with a semi-colon" do + input = "≡⃥" + output = [["Character", "≡⃥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bnot; with a semi-colon" do + input = "⌐" + output = [["Character", "⌐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bopf; with a semi-colon" do + input = "𝕓" + output = [["Character", "𝕓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bot; with a semi-colon" do + input = "⊥" + output = [["Character", "⊥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bottom; with a semi-colon" do + input = "⊥" + output = [["Character", "⊥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bowtie; with a semi-colon" do + input = "⋈" + output = [["Character", "⋈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxDL; with a semi-colon" do + input = "╗" + output = [["Character", "╗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxDR; with a semi-colon" do + input = "╔" + output = [["Character", "╔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxDl; with a semi-colon" do + input = "╖" + output = [["Character", "╖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxDr; with a semi-colon" do + input = "╓" + output = [["Character", "╓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxH; with a semi-colon" do + input = "═" + output = [["Character", "═"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxHD; with a semi-colon" do + input = "╦" + output = [["Character", "╦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxHU; with a semi-colon" do + input = "╩" + output = [["Character", "╩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxHd; with a semi-colon" do + input = "╤" + output = [["Character", "╤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxHu; with a semi-colon" do + input = "╧" + output = [["Character", "╧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxUL; with a semi-colon" do + input = "╝" + output = [["Character", "╝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxUR; with a semi-colon" do + input = "╚" + output = [["Character", "╚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxUl; with a semi-colon" do + input = "╜" + output = [["Character", "╜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxUr; with a semi-colon" do + input = "╙" + output = [["Character", "╙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxV; with a semi-colon" do + input = "║" + output = [["Character", "║"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxVH; with a semi-colon" do + input = "╬" + output = [["Character", "╬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxVL; with a semi-colon" do + input = "╣" + output = [["Character", "╣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxVR; with a semi-colon" do + input = "╠" + output = [["Character", "╠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxVh; with a semi-colon" do + input = "╫" + output = [["Character", "╫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxVl; with a semi-colon" do + input = "╢" + output = [["Character", "╢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxVr; with a semi-colon" do + input = "╟" + output = [["Character", "╟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxbox; with a semi-colon" do + input = "⧉" + output = [["Character", "⧉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxdL; with a semi-colon" do + input = "╕" + output = [["Character", "╕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxdR; with a semi-colon" do + input = "╒" + output = [["Character", "╒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxdl; with a semi-colon" do + input = "┐" + output = [["Character", "┐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxdr; with a semi-colon" do + input = "┌" + output = [["Character", "┌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxh; with a semi-colon" do + input = "─" + output = [["Character", "─"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxhD; with a semi-colon" do + input = "╥" + output = [["Character", "╥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxhU; with a semi-colon" do + input = "╨" + output = [["Character", "╨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxhd; with a semi-colon" do + input = "┬" + output = [["Character", "┬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxhu; with a semi-colon" do + input = "┴" + output = [["Character", "┴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxminus; with a semi-colon" do + input = "⊟" + output = [["Character", "⊟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxplus; with a semi-colon" do + input = "⊞" + output = [["Character", "⊞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxtimes; with a semi-colon" do + input = "⊠" + output = [["Character", "⊠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxuL; with a semi-colon" do + input = "╛" + output = [["Character", "╛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxuR; with a semi-colon" do + input = "╘" + output = [["Character", "╘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxul; with a semi-colon" do + input = "┘" + output = [["Character", "┘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxur; with a semi-colon" do + input = "└" + output = [["Character", "└"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxv; with a semi-colon" do + input = "│" + output = [["Character", "│"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxvH; with a semi-colon" do + input = "╪" + output = [["Character", "╪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxvL; with a semi-colon" do + input = "╡" + output = [["Character", "╡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxvR; with a semi-colon" do + input = "╞" + output = [["Character", "╞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxvh; with a semi-colon" do + input = "┼" + output = [["Character", "┼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxvl; with a semi-colon" do + input = "┤" + output = [["Character", "┤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: boxvr; with a semi-colon" do + input = "├" + output = [["Character", "├"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bprime; with a semi-colon" do + input = "‵" + output = [["Character", "‵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: breve; with a semi-colon" do + input = "˘" + output = [["Character", "˘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: brvbar without a semi-colon" do + input = "¦" + output = [["Character", "¦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: brvbar; with a semi-colon" do + input = "¦" + output = [["Character", "¦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bscr; with a semi-colon" do + input = "𝒷" + output = [["Character", "𝒷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bsemi; with a semi-colon" do + input = "⁏" + output = [["Character", "⁏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bsim; with a semi-colon" do + input = "∽" + output = [["Character", "∽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bsime; with a semi-colon" do + input = "⋍" + output = [["Character", "⋍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bsol; with a semi-colon" do + input = "\" + output = [["Character", "\\"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bsolb; with a semi-colon" do + input = "⧅" + output = [["Character", "⧅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bsolhsub; with a semi-colon" do + input = "⟈" + output = [["Character", "⟈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bull; with a semi-colon" do + input = "•" + output = [["Character", "•"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bullet; with a semi-colon" do + input = "•" + output = [["Character", "•"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bump; with a semi-colon" do + input = "≎" + output = [["Character", "≎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bumpE; with a semi-colon" do + input = "⪮" + output = [["Character", "⪮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bumpe; with a semi-colon" do + input = "≏" + output = [["Character", "≏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: bumpeq; with a semi-colon" do + input = "≏" + output = [["Character", "≏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cacute; with a semi-colon" do + input = "ć" + output = [["Character", "ć"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cap; with a semi-colon" do + input = "∩" + output = [["Character", "∩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part29_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part29_test.exs new file mode 100644 index 00000000..08febdcb --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part29_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart29Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: capand; with a semi-colon" do + input = "⩄" + output = [["Character", "⩄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: capbrcup; with a semi-colon" do + input = "⩉" + output = [["Character", "⩉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: capcap; with a semi-colon" do + input = "⩋" + output = [["Character", "⩋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: capcup; with a semi-colon" do + input = "⩇" + output = [["Character", "⩇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: capdot; with a semi-colon" do + input = "⩀" + output = [["Character", "⩀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: caps; with a semi-colon" do + input = "∩︀" + output = [["Character", "∩︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: caret; with a semi-colon" do + input = "⁁" + output = [["Character", "⁁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: caron; with a semi-colon" do + input = "ˇ" + output = [["Character", "ˇ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccaps; with a semi-colon" do + input = "⩍" + output = [["Character", "⩍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccaron; with a semi-colon" do + input = "č" + output = [["Character", "č"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccedil without a semi-colon" do + input = "ç" + output = [["Character", "ç"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccedil; with a semi-colon" do + input = "ç" + output = [["Character", "ç"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccirc; with a semi-colon" do + input = "ĉ" + output = [["Character", "ĉ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccups; with a semi-colon" do + input = "⩌" + output = [["Character", "⩌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ccupssm; with a semi-colon" do + input = "⩐" + output = [["Character", "⩐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cdot; with a semi-colon" do + input = "ċ" + output = [["Character", "ċ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cedil without a semi-colon" do + input = "¸" + output = [["Character", "¸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cedil; with a semi-colon" do + input = "¸" + output = [["Character", "¸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cemptyv; with a semi-colon" do + input = "⦲" + output = [["Character", "⦲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cent without a semi-colon" do + input = "¢" + output = [["Character", "¢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cent; with a semi-colon" do + input = "¢" + output = [["Character", "¢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: centerdot; with a semi-colon" do + input = "·" + output = [["Character", "·"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cfr; with a semi-colon" do + input = "𝔠" + output = [["Character", "𝔠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: chcy; with a semi-colon" do + input = "ч" + output = [["Character", "ч"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: check; with a semi-colon" do + input = "✓" + output = [["Character", "✓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: checkmark; with a semi-colon" do + input = "✓" + output = [["Character", "✓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: chi; with a semi-colon" do + input = "χ" + output = [["Character", "χ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cir; with a semi-colon" do + input = "○" + output = [["Character", "○"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cirE; with a semi-colon" do + input = "⧃" + output = [["Character", "⧃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circ; with a semi-colon" do + input = "ˆ" + output = [["Character", "ˆ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circeq; with a semi-colon" do + input = "≗" + output = [["Character", "≗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circlearrowleft; with a semi-colon" do + input = "↺" + output = [["Character", "↺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circlearrowright; with a semi-colon" do + input = "↻" + output = [["Character", "↻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circledR; with a semi-colon" do + input = "®" + output = [["Character", "®"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circledS; with a semi-colon" do + input = "Ⓢ" + output = [["Character", "Ⓢ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circledast; with a semi-colon" do + input = "⊛" + output = [["Character", "⊛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circledcirc; with a semi-colon" do + input = "⊚" + output = [["Character", "⊚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: circleddash; with a semi-colon" do + input = "⊝" + output = [["Character", "⊝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cire; with a semi-colon" do + input = "≗" + output = [["Character", "≗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cirfnint; with a semi-colon" do + input = "⨐" + output = [["Character", "⨐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cirmid; with a semi-colon" do + input = "⫯" + output = [["Character", "⫯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cirscir; with a semi-colon" do + input = "⧂" + output = [["Character", "⧂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: clubs; with a semi-colon" do + input = "♣" + output = [["Character", "♣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: clubsuit; with a semi-colon" do + input = "♣" + output = [["Character", "♣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: colon; with a semi-colon" do + input = ":" + output = [["Character", ":"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: colone; with a semi-colon" do + input = "≔" + output = [["Character", "≔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: coloneq; with a semi-colon" do + input = "≔" + output = [["Character", "≔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: comma; with a semi-colon" do + input = "," + output = [["Character", ","]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: commat; with a semi-colon" do + input = "@" + output = [["Character", "@"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: comp; with a semi-colon" do + input = "∁" + output = [["Character", "∁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: compfn; with a semi-colon" do + input = "∘" + output = [["Character", "∘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: complement; with a semi-colon" do + input = "∁" + output = [["Character", "∁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: complexes; with a semi-colon" do + input = "ℂ" + output = [["Character", "ℂ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cong; with a semi-colon" do + input = "≅" + output = [["Character", "≅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: congdot; with a semi-colon" do + input = "⩭" + output = [["Character", "⩭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: conint; with a semi-colon" do + input = "∮" + output = [["Character", "∮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: copf; with a semi-colon" do + input = "𝕔" + output = [["Character", "𝕔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: coprod; with a semi-colon" do + input = "∐" + output = [["Character", "∐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: copy without a semi-colon" do + input = "©" + output = [["Character", "©"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: copy; with a semi-colon" do + input = "©" + output = [["Character", "©"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: copysr; with a semi-colon" do + input = "℗" + output = [["Character", "℗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: crarr; with a semi-colon" do + input = "↵" + output = [["Character", "↵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cross; with a semi-colon" do + input = "✗" + output = [["Character", "✗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cscr; with a semi-colon" do + input = "𝒸" + output = [["Character", "𝒸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: csub; with a semi-colon" do + input = "⫏" + output = [["Character", "⫏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: csube; with a semi-colon" do + input = "⫑" + output = [["Character", "⫑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: csup; with a semi-colon" do + input = "⫐" + output = [["Character", "⫐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: csupe; with a semi-colon" do + input = "⫒" + output = [["Character", "⫒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ctdot; with a semi-colon" do + input = "⋯" + output = [["Character", "⋯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cudarrl; with a semi-colon" do + input = "⤸" + output = [["Character", "⤸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cudarrr; with a semi-colon" do + input = "⤵" + output = [["Character", "⤵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cuepr; with a semi-colon" do + input = "⋞" + output = [["Character", "⋞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cuesc; with a semi-colon" do + input = "⋟" + output = [["Character", "⋟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cularr; with a semi-colon" do + input = "↶" + output = [["Character", "↶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cularrp; with a semi-colon" do + input = "⤽" + output = [["Character", "⤽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cup; with a semi-colon" do + input = "∪" + output = [["Character", "∪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cupbrcap; with a semi-colon" do + input = "⩈" + output = [["Character", "⩈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cupcap; with a semi-colon" do + input = "⩆" + output = [["Character", "⩆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cupcup; with a semi-colon" do + input = "⩊" + output = [["Character", "⩊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cupdot; with a semi-colon" do + input = "⊍" + output = [["Character", "⊍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cupor; with a semi-colon" do + input = "⩅" + output = [["Character", "⩅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cups; with a semi-colon" do + input = "∪︀" + output = [["Character", "∪︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curarr; with a semi-colon" do + input = "↷" + output = [["Character", "↷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curarrm; with a semi-colon" do + input = "⤼" + output = [["Character", "⤼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curlyeqprec; with a semi-colon" do + input = "⋞" + output = [["Character", "⋞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curlyeqsucc; with a semi-colon" do + input = "⋟" + output = [["Character", "⋟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curlyvee; with a semi-colon" do + input = "⋎" + output = [["Character", "⋎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curlywedge; with a semi-colon" do + input = "⋏" + output = [["Character", "⋏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curren without a semi-colon" do + input = "¤" + output = [["Character", "¤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curren; with a semi-colon" do + input = "¤" + output = [["Character", "¤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curvearrowleft; with a semi-colon" do + input = "↶" + output = [["Character", "↶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: curvearrowright; with a semi-colon" do + input = "↷" + output = [["Character", "↷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cuvee; with a semi-colon" do + input = "⋎" + output = [["Character", "⋎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cuwed; with a semi-colon" do + input = "⋏" + output = [["Character", "⋏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cwconint; with a semi-colon" do + input = "∲" + output = [["Character", "∲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cwint; with a semi-colon" do + input = "∱" + output = [["Character", "∱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: cylcty; with a semi-colon" do + input = "⌭" + output = [["Character", "⌭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dArr; with a semi-colon" do + input = "⇓" + output = [["Character", "⇓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dHar; with a semi-colon" do + input = "⥥" + output = [["Character", "⥥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dagger; with a semi-colon" do + input = "†" + output = [["Character", "†"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part2_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part2_test.exs new file mode 100644 index 00000000..613a2f98 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part2_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart2Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: DownLeftVectorBar without a semi-colon" do + input = "&DownLeftVectorBar" + output = [["Character", "&DownLeftVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownRightTeeVector without a semi-colon" do + input = "&DownRightTeeVector" + output = [["Character", "&DownRightTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownRightVector without a semi-colon" do + input = "&DownRightVector" + output = [["Character", "&DownRightVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownRightVectorBar without a semi-colon" do + input = "&DownRightVectorBar" + output = [["Character", "&DownRightVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownTee without a semi-colon" do + input = "&DownTee" + output = [["Character", "&DownTee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: DownTeeArrow without a semi-colon" do + input = "&DownTeeArrow" + output = [["Character", "&DownTeeArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Downarrow without a semi-colon" do + input = "&Downarrow" + output = [["Character", "&Downarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dscr without a semi-colon" do + input = "&Dscr" + output = [["Character", "&Dscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Dstrok without a semi-colon" do + input = "&Dstrok" + output = [["Character", "&Dstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ENG without a semi-colon" do + input = "&ENG" + output = [["Character", "&ENG"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ecaron without a semi-colon" do + input = "&Ecaron" + output = [["Character", "&Ecaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ecy without a semi-colon" do + input = "&Ecy" + output = [["Character", "&Ecy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Edot without a semi-colon" do + input = "&Edot" + output = [["Character", "&Edot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Efr without a semi-colon" do + input = "&Efr" + output = [["Character", "&Efr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Element without a semi-colon" do + input = "&Element" + output = [["Character", "&Element"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Emacr without a semi-colon" do + input = "&Emacr" + output = [["Character", "&Emacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: EmptySmallSquare without a semi-colon" do + input = "&EmptySmallSquare" + output = [["Character", "&EmptySmallSquare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: EmptyVerySmallSquare without a semi-colon" do + input = "&EmptyVerySmallSquare" + output = [["Character", "&EmptyVerySmallSquare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Eogon without a semi-colon" do + input = "&Eogon" + output = [["Character", "&Eogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Eopf without a semi-colon" do + input = "&Eopf" + output = [["Character", "&Eopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Epsilon without a semi-colon" do + input = "&Epsilon" + output = [["Character", "&Epsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Equal without a semi-colon" do + input = "&Equal" + output = [["Character", "&Equal"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: EqualTilde without a semi-colon" do + input = "&EqualTilde" + output = [["Character", "&EqualTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Equilibrium without a semi-colon" do + input = "&Equilibrium" + output = [["Character", "&Equilibrium"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Escr without a semi-colon" do + input = "&Escr" + output = [["Character", "&Escr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Esim without a semi-colon" do + input = "&Esim" + output = [["Character", "&Esim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Eta without a semi-colon" do + input = "&Eta" + output = [["Character", "&Eta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Exists without a semi-colon" do + input = "&Exists" + output = [["Character", "&Exists"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ExponentialE without a semi-colon" do + input = "&ExponentialE" + output = [["Character", "&ExponentialE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Fcy without a semi-colon" do + input = "&Fcy" + output = [["Character", "&Fcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ffr without a semi-colon" do + input = "&Ffr" + output = [["Character", "&Ffr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: FilledSmallSquare without a semi-colon" do + input = "&FilledSmallSquare" + output = [["Character", "&FilledSmallSquare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: FilledVerySmallSquare without a semi-colon" do + input = "&FilledVerySmallSquare" + output = [["Character", "&FilledVerySmallSquare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Fopf without a semi-colon" do + input = "&Fopf" + output = [["Character", "&Fopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ForAll without a semi-colon" do + input = "&ForAll" + output = [["Character", "&ForAll"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Fouriertrf without a semi-colon" do + input = "&Fouriertrf" + output = [["Character", "&Fouriertrf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Fscr without a semi-colon" do + input = "&Fscr" + output = [["Character", "&Fscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GJcy without a semi-colon" do + input = "&GJcy" + output = [["Character", "&GJcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gamma without a semi-colon" do + input = "&Gamma" + output = [["Character", "&Gamma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gammad without a semi-colon" do + input = "&Gammad" + output = [["Character", "&Gammad"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gbreve without a semi-colon" do + input = "&Gbreve" + output = [["Character", "&Gbreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gcedil without a semi-colon" do + input = "&Gcedil" + output = [["Character", "&Gcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gcirc without a semi-colon" do + input = "&Gcirc" + output = [["Character", "&Gcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gcy without a semi-colon" do + input = "&Gcy" + output = [["Character", "&Gcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gdot without a semi-colon" do + input = "&Gdot" + output = [["Character", "&Gdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gfr without a semi-colon" do + input = "&Gfr" + output = [["Character", "&Gfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gg without a semi-colon" do + input = "&Gg" + output = [["Character", "&Gg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gopf without a semi-colon" do + input = "&Gopf" + output = [["Character", "&Gopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterEqual without a semi-colon" do + input = "&GreaterEqual" + output = [["Character", "&GreaterEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterEqualLess without a semi-colon" do + input = "&GreaterEqualLess" + output = [["Character", "&GreaterEqualLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterFullEqual without a semi-colon" do + input = "&GreaterFullEqual" + output = [["Character", "&GreaterFullEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterGreater without a semi-colon" do + input = "&GreaterGreater" + output = [["Character", "&GreaterGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterLess without a semi-colon" do + input = "&GreaterLess" + output = [["Character", "&GreaterLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterSlantEqual without a semi-colon" do + input = "&GreaterSlantEqual" + output = [["Character", "&GreaterSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: GreaterTilde without a semi-colon" do + input = "&GreaterTilde" + output = [["Character", "&GreaterTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gscr without a semi-colon" do + input = "&Gscr" + output = [["Character", "&Gscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Gt without a semi-colon" do + input = "&Gt" + output = [["Character", "&Gt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: HARDcy without a semi-colon" do + input = "&HARDcy" + output = [["Character", "&HARDcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hacek without a semi-colon" do + input = "&Hacek" + output = [["Character", "&Hacek"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hat without a semi-colon" do + input = "&Hat" + output = [["Character", "&Hat"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hcirc without a semi-colon" do + input = "&Hcirc" + output = [["Character", "&Hcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hfr without a semi-colon" do + input = "&Hfr" + output = [["Character", "&Hfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: HilbertSpace without a semi-colon" do + input = "&HilbertSpace" + output = [["Character", "&HilbertSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hopf without a semi-colon" do + input = "&Hopf" + output = [["Character", "&Hopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: HorizontalLine without a semi-colon" do + input = "&HorizontalLine" + output = [["Character", "&HorizontalLine"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hscr without a semi-colon" do + input = "&Hscr" + output = [["Character", "&Hscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Hstrok without a semi-colon" do + input = "&Hstrok" + output = [["Character", "&Hstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: HumpDownHump without a semi-colon" do + input = "&HumpDownHump" + output = [["Character", "&HumpDownHump"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: HumpEqual without a semi-colon" do + input = "&HumpEqual" + output = [["Character", "&HumpEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: IEcy without a semi-colon" do + input = "&IEcy" + output = [["Character", "&IEcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: IJlig without a semi-colon" do + input = "&IJlig" + output = [["Character", "&IJlig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: IOcy without a semi-colon" do + input = "&IOcy" + output = [["Character", "&IOcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Icy without a semi-colon" do + input = "&Icy" + output = [["Character", "&Icy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Idot without a semi-colon" do + input = "&Idot" + output = [["Character", "&Idot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ifr without a semi-colon" do + input = "&Ifr" + output = [["Character", "&Ifr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Im without a semi-colon" do + input = "&Im" + output = [["Character", "&Im"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Imacr without a semi-colon" do + input = "&Imacr" + output = [["Character", "&Imacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ImaginaryI without a semi-colon" do + input = "&ImaginaryI" + output = [["Character", "&ImaginaryI"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Implies without a semi-colon" do + input = "&Implies" + output = [["Character", "&Implies"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Int without a semi-colon" do + input = "&Int" + output = [["Character", "&Int"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Integral without a semi-colon" do + input = "&Integral" + output = [["Character", "&Integral"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Intersection without a semi-colon" do + input = "&Intersection" + output = [["Character", "&Intersection"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: InvisibleComma without a semi-colon" do + input = "&InvisibleComma" + output = [["Character", "&InvisibleComma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: InvisibleTimes without a semi-colon" do + input = "&InvisibleTimes" + output = [["Character", "&InvisibleTimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Iogon without a semi-colon" do + input = "&Iogon" + output = [["Character", "&Iogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Iopf without a semi-colon" do + input = "&Iopf" + output = [["Character", "&Iopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Iota without a semi-colon" do + input = "&Iota" + output = [["Character", "&Iota"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Iscr without a semi-colon" do + input = "&Iscr" + output = [["Character", "&Iscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Itilde without a semi-colon" do + input = "&Itilde" + output = [["Character", "&Itilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Iukcy without a semi-colon" do + input = "&Iukcy" + output = [["Character", "&Iukcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jcirc without a semi-colon" do + input = "&Jcirc" + output = [["Character", "&Jcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jcy without a semi-colon" do + input = "&Jcy" + output = [["Character", "&Jcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jfr without a semi-colon" do + input = "&Jfr" + output = [["Character", "&Jfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jopf without a semi-colon" do + input = "&Jopf" + output = [["Character", "&Jopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jscr without a semi-colon" do + input = "&Jscr" + output = [["Character", "&Jscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jsercy without a semi-colon" do + input = "&Jsercy" + output = [["Character", "&Jsercy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Jukcy without a semi-colon" do + input = "&Jukcy" + output = [["Character", "&Jukcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: KHcy without a semi-colon" do + input = "&KHcy" + output = [["Character", "&KHcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: KJcy without a semi-colon" do + input = "&KJcy" + output = [["Character", "&KJcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Kappa without a semi-colon" do + input = "&Kappa" + output = [["Character", "&Kappa"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part30_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part30_test.exs new file mode 100644 index 00000000..80036071 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part30_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart30Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: daleth; with a semi-colon" do + input = "ℸ" + output = [["Character", "ℸ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: darr; with a semi-colon" do + input = "↓" + output = [["Character", "↓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dash; with a semi-colon" do + input = "‐" + output = [["Character", "‐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dashv; with a semi-colon" do + input = "⊣" + output = [["Character", "⊣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dbkarow; with a semi-colon" do + input = "⤏" + output = [["Character", "⤏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dblac; with a semi-colon" do + input = "˝" + output = [["Character", "˝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dcaron; with a semi-colon" do + input = "ď" + output = [["Character", "ď"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dcy; with a semi-colon" do + input = "д" + output = [["Character", "д"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dd; with a semi-colon" do + input = "ⅆ" + output = [["Character", "ⅆ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ddagger; with a semi-colon" do + input = "‡" + output = [["Character", "‡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ddarr; with a semi-colon" do + input = "⇊" + output = [["Character", "⇊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ddotseq; with a semi-colon" do + input = "⩷" + output = [["Character", "⩷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: deg without a semi-colon" do + input = "°" + output = [["Character", "°"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: deg; with a semi-colon" do + input = "°" + output = [["Character", "°"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: delta; with a semi-colon" do + input = "δ" + output = [["Character", "δ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: demptyv; with a semi-colon" do + input = "⦱" + output = [["Character", "⦱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dfisht; with a semi-colon" do + input = "⥿" + output = [["Character", "⥿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dfr; with a semi-colon" do + input = "𝔡" + output = [["Character", "𝔡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dharl; with a semi-colon" do + input = "⇃" + output = [["Character", "⇃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dharr; with a semi-colon" do + input = "⇂" + output = [["Character", "⇂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: diam; with a semi-colon" do + input = "⋄" + output = [["Character", "⋄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: diamond; with a semi-colon" do + input = "⋄" + output = [["Character", "⋄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: diamondsuit; with a semi-colon" do + input = "♦" + output = [["Character", "♦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: diams; with a semi-colon" do + input = "♦" + output = [["Character", "♦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: die; with a semi-colon" do + input = "¨" + output = [["Character", "¨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: digamma; with a semi-colon" do + input = "ϝ" + output = [["Character", "ϝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: disin; with a semi-colon" do + input = "⋲" + output = [["Character", "⋲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: div; with a semi-colon" do + input = "÷" + output = [["Character", "÷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: divide without a semi-colon" do + input = "÷" + output = [["Character", "÷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: divide; with a semi-colon" do + input = "÷" + output = [["Character", "÷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: divideontimes; with a semi-colon" do + input = "⋇" + output = [["Character", "⋇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: divonx; with a semi-colon" do + input = "⋇" + output = [["Character", "⋇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: djcy; with a semi-colon" do + input = "ђ" + output = [["Character", "ђ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dlcorn; with a semi-colon" do + input = "⌞" + output = [["Character", "⌞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dlcrop; with a semi-colon" do + input = "⌍" + output = [["Character", "⌍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dollar; with a semi-colon" do + input = "$" + output = [["Character", "$"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dopf; with a semi-colon" do + input = "𝕕" + output = [["Character", "𝕕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dot; with a semi-colon" do + input = "˙" + output = [["Character", "˙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: doteq; with a semi-colon" do + input = "≐" + output = [["Character", "≐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: doteqdot; with a semi-colon" do + input = "≑" + output = [["Character", "≑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dotminus; with a semi-colon" do + input = "∸" + output = [["Character", "∸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dotplus; with a semi-colon" do + input = "∔" + output = [["Character", "∔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dotsquare; with a semi-colon" do + input = "⊡" + output = [["Character", "⊡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: doublebarwedge; with a semi-colon" do + input = "⌆" + output = [["Character", "⌆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: downarrow; with a semi-colon" do + input = "↓" + output = [["Character", "↓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: downdownarrows; with a semi-colon" do + input = "⇊" + output = [["Character", "⇊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: downharpoonleft; with a semi-colon" do + input = "⇃" + output = [["Character", "⇃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: downharpoonright; with a semi-colon" do + input = "⇂" + output = [["Character", "⇂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: drbkarow; with a semi-colon" do + input = "⤐" + output = [["Character", "⤐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: drcorn; with a semi-colon" do + input = "⌟" + output = [["Character", "⌟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: drcrop; with a semi-colon" do + input = "⌌" + output = [["Character", "⌌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dscr; with a semi-colon" do + input = "𝒹" + output = [["Character", "𝒹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dscy; with a semi-colon" do + input = "ѕ" + output = [["Character", "ѕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dsol; with a semi-colon" do + input = "⧶" + output = [["Character", "⧶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dstrok; with a semi-colon" do + input = "đ" + output = [["Character", "đ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dtdot; with a semi-colon" do + input = "⋱" + output = [["Character", "⋱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dtri; with a semi-colon" do + input = "▿" + output = [["Character", "▿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dtrif; with a semi-colon" do + input = "▾" + output = [["Character", "▾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: duarr; with a semi-colon" do + input = "⇵" + output = [["Character", "⇵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: duhar; with a semi-colon" do + input = "⥯" + output = [["Character", "⥯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dwangle; with a semi-colon" do + input = "⦦" + output = [["Character", "⦦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dzcy; with a semi-colon" do + input = "џ" + output = [["Character", "џ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: dzigrarr; with a semi-colon" do + input = "⟿" + output = [["Character", "⟿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eDDot; with a semi-colon" do + input = "⩷" + output = [["Character", "⩷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eDot; with a semi-colon" do + input = "≑" + output = [["Character", "≑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eacute without a semi-colon" do + input = "é" + output = [["Character", "é"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eacute; with a semi-colon" do + input = "é" + output = [["Character", "é"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: easter; with a semi-colon" do + input = "⩮" + output = [["Character", "⩮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ecaron; with a semi-colon" do + input = "ě" + output = [["Character", "ě"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ecir; with a semi-colon" do + input = "≖" + output = [["Character", "≖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ecirc without a semi-colon" do + input = "ê" + output = [["Character", "ê"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ecirc; with a semi-colon" do + input = "ê" + output = [["Character", "ê"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ecolon; with a semi-colon" do + input = "≕" + output = [["Character", "≕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ecy; with a semi-colon" do + input = "э" + output = [["Character", "э"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: edot; with a semi-colon" do + input = "ė" + output = [["Character", "ė"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ee; with a semi-colon" do + input = "ⅇ" + output = [["Character", "ⅇ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: efDot; with a semi-colon" do + input = "≒" + output = [["Character", "≒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: efr; with a semi-colon" do + input = "𝔢" + output = [["Character", "𝔢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eg; with a semi-colon" do + input = "⪚" + output = [["Character", "⪚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: egrave without a semi-colon" do + input = "è" + output = [["Character", "è"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: egrave; with a semi-colon" do + input = "è" + output = [["Character", "è"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: egs; with a semi-colon" do + input = "⪖" + output = [["Character", "⪖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: egsdot; with a semi-colon" do + input = "⪘" + output = [["Character", "⪘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: el; with a semi-colon" do + input = "⪙" + output = [["Character", "⪙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: elinters; with a semi-colon" do + input = "⏧" + output = [["Character", "⏧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ell; with a semi-colon" do + input = "ℓ" + output = [["Character", "ℓ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: els; with a semi-colon" do + input = "⪕" + output = [["Character", "⪕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: elsdot; with a semi-colon" do + input = "⪗" + output = [["Character", "⪗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: emacr; with a semi-colon" do + input = "ē" + output = [["Character", "ē"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: empty; with a semi-colon" do + input = "∅" + output = [["Character", "∅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: emptyset; with a semi-colon" do + input = "∅" + output = [["Character", "∅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: emptyv; with a semi-colon" do + input = "∅" + output = [["Character", "∅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: emsp13; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: emsp14; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: emsp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eng; with a semi-colon" do + input = "ŋ" + output = [["Character", "ŋ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ensp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eogon; with a semi-colon" do + input = "ę" + output = [["Character", "ę"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eopf; with a semi-colon" do + input = "𝕖" + output = [["Character", "𝕖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: epar; with a semi-colon" do + input = "⋕" + output = [["Character", "⋕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part31_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part31_test.exs new file mode 100644 index 00000000..c61e7289 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part31_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart31Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: eparsl; with a semi-colon" do + input = "⧣" + output = [["Character", "⧣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eplus; with a semi-colon" do + input = "⩱" + output = [["Character", "⩱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: epsi; with a semi-colon" do + input = "ε" + output = [["Character", "ε"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: epsilon; with a semi-colon" do + input = "ε" + output = [["Character", "ε"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: epsiv; with a semi-colon" do + input = "ϵ" + output = [["Character", "ϵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eqcirc; with a semi-colon" do + input = "≖" + output = [["Character", "≖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eqcolon; with a semi-colon" do + input = "≕" + output = [["Character", "≕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eqsim; with a semi-colon" do + input = "≂" + output = [["Character", "≂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eqslantgtr; with a semi-colon" do + input = "⪖" + output = [["Character", "⪖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eqslantless; with a semi-colon" do + input = "⪕" + output = [["Character", "⪕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: equals; with a semi-colon" do + input = "=" + output = [["Character", "="]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: equest; with a semi-colon" do + input = "≟" + output = [["Character", "≟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: equiv; with a semi-colon" do + input = "≡" + output = [["Character", "≡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: equivDD; with a semi-colon" do + input = "⩸" + output = [["Character", "⩸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eqvparsl; with a semi-colon" do + input = "⧥" + output = [["Character", "⧥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: erDot; with a semi-colon" do + input = "≓" + output = [["Character", "≓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: erarr; with a semi-colon" do + input = "⥱" + output = [["Character", "⥱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: escr; with a semi-colon" do + input = "ℯ" + output = [["Character", "ℯ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: esdot; with a semi-colon" do + input = "≐" + output = [["Character", "≐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: esim; with a semi-colon" do + input = "≂" + output = [["Character", "≂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eta; with a semi-colon" do + input = "η" + output = [["Character", "η"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eth without a semi-colon" do + input = "ð" + output = [["Character", "ð"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: eth; with a semi-colon" do + input = "ð" + output = [["Character", "ð"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: euml without a semi-colon" do + input = "ë" + output = [["Character", "ë"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: euml; with a semi-colon" do + input = "ë" + output = [["Character", "ë"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: euro; with a semi-colon" do + input = "€" + output = [["Character", "€"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: excl; with a semi-colon" do + input = "!" + output = [["Character", "!"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: exist; with a semi-colon" do + input = "∃" + output = [["Character", "∃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: expectation; with a semi-colon" do + input = "ℰ" + output = [["Character", "ℰ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: exponentiale; with a semi-colon" do + input = "ⅇ" + output = [["Character", "ⅇ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fallingdotseq; with a semi-colon" do + input = "≒" + output = [["Character", "≒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fcy; with a semi-colon" do + input = "ф" + output = [["Character", "ф"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: female; with a semi-colon" do + input = "♀" + output = [["Character", "♀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ffilig; with a semi-colon" do + input = "ffi" + output = [["Character", "ffi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fflig; with a semi-colon" do + input = "ff" + output = [["Character", "ff"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ffllig; with a semi-colon" do + input = "ffl" + output = [["Character", "ffl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ffr; with a semi-colon" do + input = "𝔣" + output = [["Character", "𝔣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: filig; with a semi-colon" do + input = "fi" + output = [["Character", "fi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fjlig; with a semi-colon" do + input = "fj" + output = [["Character", "fj"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: flat; with a semi-colon" do + input = "♭" + output = [["Character", "♭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fllig; with a semi-colon" do + input = "fl" + output = [["Character", "fl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fltns; with a semi-colon" do + input = "▱" + output = [["Character", "▱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fnof; with a semi-colon" do + input = "ƒ" + output = [["Character", "ƒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fopf; with a semi-colon" do + input = "𝕗" + output = [["Character", "𝕗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: forall; with a semi-colon" do + input = "∀" + output = [["Character", "∀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fork; with a semi-colon" do + input = "⋔" + output = [["Character", "⋔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: forkv; with a semi-colon" do + input = "⫙" + output = [["Character", "⫙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fpartint; with a semi-colon" do + input = "⨍" + output = [["Character", "⨍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac12 without a semi-colon" do + input = "½" + output = [["Character", "½"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac12; with a semi-colon" do + input = "½" + output = [["Character", "½"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac13; with a semi-colon" do + input = "⅓" + output = [["Character", "⅓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac14 without a semi-colon" do + input = "¼" + output = [["Character", "¼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac14; with a semi-colon" do + input = "¼" + output = [["Character", "¼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac15; with a semi-colon" do + input = "⅕" + output = [["Character", "⅕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac16; with a semi-colon" do + input = "⅙" + output = [["Character", "⅙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac18; with a semi-colon" do + input = "⅛" + output = [["Character", "⅛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac23; with a semi-colon" do + input = "⅔" + output = [["Character", "⅔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac25; with a semi-colon" do + input = "⅖" + output = [["Character", "⅖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac34 without a semi-colon" do + input = "¾" + output = [["Character", "¾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac34; with a semi-colon" do + input = "¾" + output = [["Character", "¾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac35; with a semi-colon" do + input = "⅗" + output = [["Character", "⅗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac38; with a semi-colon" do + input = "⅜" + output = [["Character", "⅜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac45; with a semi-colon" do + input = "⅘" + output = [["Character", "⅘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac56; with a semi-colon" do + input = "⅚" + output = [["Character", "⅚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac58; with a semi-colon" do + input = "⅝" + output = [["Character", "⅝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frac78; with a semi-colon" do + input = "⅞" + output = [["Character", "⅞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frasl; with a semi-colon" do + input = "⁄" + output = [["Character", "⁄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: frown; with a semi-colon" do + input = "⌢" + output = [["Character", "⌢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: fscr; with a semi-colon" do + input = "𝒻" + output = [["Character", "𝒻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gE; with a semi-colon" do + input = "≧" + output = [["Character", "≧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gEl; with a semi-colon" do + input = "⪌" + output = [["Character", "⪌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gacute; with a semi-colon" do + input = "ǵ" + output = [["Character", "ǵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gamma; with a semi-colon" do + input = "γ" + output = [["Character", "γ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gammad; with a semi-colon" do + input = "ϝ" + output = [["Character", "ϝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gap; with a semi-colon" do + input = "⪆" + output = [["Character", "⪆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gbreve; with a semi-colon" do + input = "ğ" + output = [["Character", "ğ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gcirc; with a semi-colon" do + input = "ĝ" + output = [["Character", "ĝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gcy; with a semi-colon" do + input = "г" + output = [["Character", "г"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gdot; with a semi-colon" do + input = "ġ" + output = [["Character", "ġ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ge; with a semi-colon" do + input = "≥" + output = [["Character", "≥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gel; with a semi-colon" do + input = "⋛" + output = [["Character", "⋛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: geq; with a semi-colon" do + input = "≥" + output = [["Character", "≥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: geqq; with a semi-colon" do + input = "≧" + output = [["Character", "≧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: geqslant; with a semi-colon" do + input = "⩾" + output = [["Character", "⩾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ges; with a semi-colon" do + input = "⩾" + output = [["Character", "⩾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gescc; with a semi-colon" do + input = "⪩" + output = [["Character", "⪩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gesdot; with a semi-colon" do + input = "⪀" + output = [["Character", "⪀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gesdoto; with a semi-colon" do + input = "⪂" + output = [["Character", "⪂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gesdotol; with a semi-colon" do + input = "⪄" + output = [["Character", "⪄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gesl; with a semi-colon" do + input = "⋛︀" + output = [["Character", "⋛︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gesles; with a semi-colon" do + input = "⪔" + output = [["Character", "⪔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gfr; with a semi-colon" do + input = "𝔤" + output = [["Character", "𝔤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gg; with a semi-colon" do + input = "≫" + output = [["Character", "≫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ggg; with a semi-colon" do + input = "⋙" + output = [["Character", "⋙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gimel; with a semi-colon" do + input = "ℷ" + output = [["Character", "ℷ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gjcy; with a semi-colon" do + input = "ѓ" + output = [["Character", "ѓ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gl; with a semi-colon" do + input = "≷" + output = [["Character", "≷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: glE; with a semi-colon" do + input = "⪒" + output = [["Character", "⪒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gla; with a semi-colon" do + input = "⪥" + output = [["Character", "⪥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: glj; with a semi-colon" do + input = "⪤" + output = [["Character", "⪤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part32_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part32_test.exs new file mode 100644 index 00000000..5b673c7b --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part32_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart32Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: gnE; with a semi-colon" do + input = "≩" + output = [["Character", "≩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gnap; with a semi-colon" do + input = "⪊" + output = [["Character", "⪊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gnapprox; with a semi-colon" do + input = "⪊" + output = [["Character", "⪊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gne; with a semi-colon" do + input = "⪈" + output = [["Character", "⪈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gneq; with a semi-colon" do + input = "⪈" + output = [["Character", "⪈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gneqq; with a semi-colon" do + input = "≩" + output = [["Character", "≩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gnsim; with a semi-colon" do + input = "⋧" + output = [["Character", "⋧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gopf; with a semi-colon" do + input = "𝕘" + output = [["Character", "𝕘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: grave; with a semi-colon" do + input = "`" + output = [["Character", "`"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gscr; with a semi-colon" do + input = "ℊ" + output = [["Character", "ℊ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gsim; with a semi-colon" do + input = "≳" + output = [["Character", "≳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gsime; with a semi-colon" do + input = "⪎" + output = [["Character", "⪎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gsiml; with a semi-colon" do + input = "⪐" + output = [["Character", "⪐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gt without a semi-colon" do + input = ">" + output = [["Character", ">"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gt; with a semi-colon" do + input = ">" + output = [["Character", ">"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtcc; with a semi-colon" do + input = "⪧" + output = [["Character", "⪧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtcir; with a semi-colon" do + input = "⩺" + output = [["Character", "⩺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtdot; with a semi-colon" do + input = "⋗" + output = [["Character", "⋗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtlPar; with a semi-colon" do + input = "⦕" + output = [["Character", "⦕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtquest; with a semi-colon" do + input = "⩼" + output = [["Character", "⩼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtrapprox; with a semi-colon" do + input = "⪆" + output = [["Character", "⪆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtrarr; with a semi-colon" do + input = "⥸" + output = [["Character", "⥸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtrdot; with a semi-colon" do + input = "⋗" + output = [["Character", "⋗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtreqless; with a semi-colon" do + input = "⋛" + output = [["Character", "⋛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtreqqless; with a semi-colon" do + input = "⪌" + output = [["Character", "⪌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtrless; with a semi-colon" do + input = "≷" + output = [["Character", "≷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gtrsim; with a semi-colon" do + input = "≳" + output = [["Character", "≳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gvertneqq; with a semi-colon" do + input = "≩︀" + output = [["Character", "≩︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: gvnE; with a semi-colon" do + input = "≩︀" + output = [["Character", "≩︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hArr; with a semi-colon" do + input = "⇔" + output = [["Character", "⇔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hairsp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: half; with a semi-colon" do + input = "½" + output = [["Character", "½"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hamilt; with a semi-colon" do + input = "ℋ" + output = [["Character", "ℋ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hardcy; with a semi-colon" do + input = "ъ" + output = [["Character", "ъ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: harr; with a semi-colon" do + input = "↔" + output = [["Character", "↔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: harrcir; with a semi-colon" do + input = "⥈" + output = [["Character", "⥈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: harrw; with a semi-colon" do + input = "↭" + output = [["Character", "↭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hbar; with a semi-colon" do + input = "ℏ" + output = [["Character", "ℏ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hcirc; with a semi-colon" do + input = "ĥ" + output = [["Character", "ĥ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hearts; with a semi-colon" do + input = "♥" + output = [["Character", "♥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: heartsuit; with a semi-colon" do + input = "♥" + output = [["Character", "♥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hellip; with a semi-colon" do + input = "…" + output = [["Character", "…"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hercon; with a semi-colon" do + input = "⊹" + output = [["Character", "⊹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hfr; with a semi-colon" do + input = "𝔥" + output = [["Character", "𝔥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hksearow; with a semi-colon" do + input = "⤥" + output = [["Character", "⤥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hkswarow; with a semi-colon" do + input = "⤦" + output = [["Character", "⤦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hoarr; with a semi-colon" do + input = "⇿" + output = [["Character", "⇿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: homtht; with a semi-colon" do + input = "∻" + output = [["Character", "∻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hookleftarrow; with a semi-colon" do + input = "↩" + output = [["Character", "↩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hookrightarrow; with a semi-colon" do + input = "↪" + output = [["Character", "↪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hopf; with a semi-colon" do + input = "𝕙" + output = [["Character", "𝕙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: horbar; with a semi-colon" do + input = "―" + output = [["Character", "―"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hscr; with a semi-colon" do + input = "𝒽" + output = [["Character", "𝒽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hslash; with a semi-colon" do + input = "ℏ" + output = [["Character", "ℏ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hstrok; with a semi-colon" do + input = "ħ" + output = [["Character", "ħ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hybull; with a semi-colon" do + input = "⁃" + output = [["Character", "⁃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: hyphen; with a semi-colon" do + input = "‐" + output = [["Character", "‐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iacute without a semi-colon" do + input = "í" + output = [["Character", "í"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iacute; with a semi-colon" do + input = "í" + output = [["Character", "í"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ic; with a semi-colon" do + input = "⁣" + output = [["Character", "⁣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: icirc without a semi-colon" do + input = "î" + output = [["Character", "î"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: icirc; with a semi-colon" do + input = "î" + output = [["Character", "î"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: icy; with a semi-colon" do + input = "и" + output = [["Character", "и"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iecy; with a semi-colon" do + input = "е" + output = [["Character", "е"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iexcl without a semi-colon" do + input = "¡" + output = [["Character", "¡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iexcl; with a semi-colon" do + input = "¡" + output = [["Character", "¡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iff; with a semi-colon" do + input = "⇔" + output = [["Character", "⇔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ifr; with a semi-colon" do + input = "𝔦" + output = [["Character", "𝔦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: igrave without a semi-colon" do + input = "ì" + output = [["Character", "ì"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: igrave; with a semi-colon" do + input = "ì" + output = [["Character", "ì"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ii; with a semi-colon" do + input = "ⅈ" + output = [["Character", "ⅈ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iiiint; with a semi-colon" do + input = "⨌" + output = [["Character", "⨌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iiint; with a semi-colon" do + input = "∭" + output = [["Character", "∭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iinfin; with a semi-colon" do + input = "⧜" + output = [["Character", "⧜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iiota; with a semi-colon" do + input = "℩" + output = [["Character", "℩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ijlig; with a semi-colon" do + input = "ij" + output = [["Character", "ij"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: imacr; with a semi-colon" do + input = "ī" + output = [["Character", "ī"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: image; with a semi-colon" do + input = "ℑ" + output = [["Character", "ℑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: imagline; with a semi-colon" do + input = "ℐ" + output = [["Character", "ℐ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: imagpart; with a semi-colon" do + input = "ℑ" + output = [["Character", "ℑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: imath; with a semi-colon" do + input = "ı" + output = [["Character", "ı"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: imof; with a semi-colon" do + input = "⊷" + output = [["Character", "⊷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: imped; with a semi-colon" do + input = "Ƶ" + output = [["Character", "Ƶ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: in; with a semi-colon" do + input = "∈" + output = [["Character", "∈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: incare; with a semi-colon" do + input = "℅" + output = [["Character", "℅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: infin; with a semi-colon" do + input = "∞" + output = [["Character", "∞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: infintie; with a semi-colon" do + input = "⧝" + output = [["Character", "⧝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: inodot; with a semi-colon" do + input = "ı" + output = [["Character", "ı"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: int; with a semi-colon" do + input = "∫" + output = [["Character", "∫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: intcal; with a semi-colon" do + input = "⊺" + output = [["Character", "⊺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: integers; with a semi-colon" do + input = "ℤ" + output = [["Character", "ℤ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: intercal; with a semi-colon" do + input = "⊺" + output = [["Character", "⊺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: intlarhk; with a semi-colon" do + input = "⨗" + output = [["Character", "⨗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: intprod; with a semi-colon" do + input = "⨼" + output = [["Character", "⨼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iocy; with a semi-colon" do + input = "ё" + output = [["Character", "ё"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iogon; with a semi-colon" do + input = "į" + output = [["Character", "į"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iopf; with a semi-colon" do + input = "𝕚" + output = [["Character", "𝕚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iota; with a semi-colon" do + input = "ι" + output = [["Character", "ι"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iprod; with a semi-colon" do + input = "⨼" + output = [["Character", "⨼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iquest without a semi-colon" do + input = "¿" + output = [["Character", "¿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part33_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part33_test.exs new file mode 100644 index 00000000..9da003f5 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part33_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart33Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: iquest; with a semi-colon" do + input = "¿" + output = [["Character", "¿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iscr; with a semi-colon" do + input = "𝒾" + output = [["Character", "𝒾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: isin; with a semi-colon" do + input = "∈" + output = [["Character", "∈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: isinE; with a semi-colon" do + input = "⋹" + output = [["Character", "⋹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: isindot; with a semi-colon" do + input = "⋵" + output = [["Character", "⋵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: isins; with a semi-colon" do + input = "⋴" + output = [["Character", "⋴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: isinsv; with a semi-colon" do + input = "⋳" + output = [["Character", "⋳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: isinv; with a semi-colon" do + input = "∈" + output = [["Character", "∈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: it; with a semi-colon" do + input = "⁢" + output = [["Character", "⁢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: itilde; with a semi-colon" do + input = "ĩ" + output = [["Character", "ĩ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iukcy; with a semi-colon" do + input = "і" + output = [["Character", "і"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iuml without a semi-colon" do + input = "ï" + output = [["Character", "ï"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: iuml; with a semi-colon" do + input = "ï" + output = [["Character", "ï"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jcirc; with a semi-colon" do + input = "ĵ" + output = [["Character", "ĵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jcy; with a semi-colon" do + input = "й" + output = [["Character", "й"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jfr; with a semi-colon" do + input = "𝔧" + output = [["Character", "𝔧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jmath; with a semi-colon" do + input = "ȷ" + output = [["Character", "ȷ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jopf; with a semi-colon" do + input = "𝕛" + output = [["Character", "𝕛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jscr; with a semi-colon" do + input = "𝒿" + output = [["Character", "𝒿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jsercy; with a semi-colon" do + input = "ј" + output = [["Character", "ј"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: jukcy; with a semi-colon" do + input = "є" + output = [["Character", "є"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kappa; with a semi-colon" do + input = "κ" + output = [["Character", "κ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kappav; with a semi-colon" do + input = "ϰ" + output = [["Character", "ϰ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kcedil; with a semi-colon" do + input = "ķ" + output = [["Character", "ķ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kcy; with a semi-colon" do + input = "к" + output = [["Character", "к"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kfr; with a semi-colon" do + input = "𝔨" + output = [["Character", "𝔨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kgreen; with a semi-colon" do + input = "ĸ" + output = [["Character", "ĸ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: khcy; with a semi-colon" do + input = "х" + output = [["Character", "х"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kjcy; with a semi-colon" do + input = "ќ" + output = [["Character", "ќ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kopf; with a semi-colon" do + input = "𝕜" + output = [["Character", "𝕜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: kscr; with a semi-colon" do + input = "𝓀" + output = [["Character", "𝓀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lAarr; with a semi-colon" do + input = "⇚" + output = [["Character", "⇚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lArr; with a semi-colon" do + input = "⇐" + output = [["Character", "⇐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lAtail; with a semi-colon" do + input = "⤛" + output = [["Character", "⤛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lBarr; with a semi-colon" do + input = "⤎" + output = [["Character", "⤎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lE; with a semi-colon" do + input = "≦" + output = [["Character", "≦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lEg; with a semi-colon" do + input = "⪋" + output = [["Character", "⪋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lHar; with a semi-colon" do + input = "⥢" + output = [["Character", "⥢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lacute; with a semi-colon" do + input = "ĺ" + output = [["Character", "ĺ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: laemptyv; with a semi-colon" do + input = "⦴" + output = [["Character", "⦴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lagran; with a semi-colon" do + input = "ℒ" + output = [["Character", "ℒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lambda; with a semi-colon" do + input = "λ" + output = [["Character", "λ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lang; with a semi-colon" do + input = "⟨" + output = [["Character", "⟨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: langd; with a semi-colon" do + input = "⦑" + output = [["Character", "⦑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: langle; with a semi-colon" do + input = "⟨" + output = [["Character", "⟨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lap; with a semi-colon" do + input = "⪅" + output = [["Character", "⪅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: laquo without a semi-colon" do + input = "«" + output = [["Character", "«"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: laquo; with a semi-colon" do + input = "«" + output = [["Character", "«"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larr; with a semi-colon" do + input = "←" + output = [["Character", "←"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrb; with a semi-colon" do + input = "⇤" + output = [["Character", "⇤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrbfs; with a semi-colon" do + input = "⤟" + output = [["Character", "⤟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrfs; with a semi-colon" do + input = "⤝" + output = [["Character", "⤝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrhk; with a semi-colon" do + input = "↩" + output = [["Character", "↩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrlp; with a semi-colon" do + input = "↫" + output = [["Character", "↫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrpl; with a semi-colon" do + input = "⤹" + output = [["Character", "⤹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrsim; with a semi-colon" do + input = "⥳" + output = [["Character", "⥳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: larrtl; with a semi-colon" do + input = "↢" + output = [["Character", "↢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lat; with a semi-colon" do + input = "⪫" + output = [["Character", "⪫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: latail; with a semi-colon" do + input = "⤙" + output = [["Character", "⤙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: late; with a semi-colon" do + input = "⪭" + output = [["Character", "⪭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lates; with a semi-colon" do + input = "⪭︀" + output = [["Character", "⪭︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbarr; with a semi-colon" do + input = "⤌" + output = [["Character", "⤌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbbrk; with a semi-colon" do + input = "❲" + output = [["Character", "❲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbrace; with a semi-colon" do + input = "{" + output = [["Character", "{"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbrack; with a semi-colon" do + input = "[" + output = [["Character", "["]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbrke; with a semi-colon" do + input = "⦋" + output = [["Character", "⦋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbrksld; with a semi-colon" do + input = "⦏" + output = [["Character", "⦏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lbrkslu; with a semi-colon" do + input = "⦍" + output = [["Character", "⦍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lcaron; with a semi-colon" do + input = "ľ" + output = [["Character", "ľ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lcedil; with a semi-colon" do + input = "ļ" + output = [["Character", "ļ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lceil; with a semi-colon" do + input = "⌈" + output = [["Character", "⌈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lcub; with a semi-colon" do + input = "{" + output = [["Character", "{"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lcy; with a semi-colon" do + input = "л" + output = [["Character", "л"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ldca; with a semi-colon" do + input = "⤶" + output = [["Character", "⤶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ldquo; with a semi-colon" do + input = "“" + output = [["Character", "“"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ldquor; with a semi-colon" do + input = "„" + output = [["Character", "„"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ldrdhar; with a semi-colon" do + input = "⥧" + output = [["Character", "⥧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ldrushar; with a semi-colon" do + input = "⥋" + output = [["Character", "⥋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ldsh; with a semi-colon" do + input = "↲" + output = [["Character", "↲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: le; with a semi-colon" do + input = "≤" + output = [["Character", "≤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftarrow; with a semi-colon" do + input = "←" + output = [["Character", "←"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftarrowtail; with a semi-colon" do + input = "↢" + output = [["Character", "↢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftharpoondown; with a semi-colon" do + input = "↽" + output = [["Character", "↽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftharpoonup; with a semi-colon" do + input = "↼" + output = [["Character", "↼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftleftarrows; with a semi-colon" do + input = "⇇" + output = [["Character", "⇇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftrightarrow; with a semi-colon" do + input = "↔" + output = [["Character", "↔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftrightarrows; with a semi-colon" do + input = "⇆" + output = [["Character", "⇆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftrightharpoons; with a semi-colon" do + input = "⇋" + output = [["Character", "⇋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftrightsquigarrow; with a semi-colon" do + input = "↭" + output = [["Character", "↭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leftthreetimes; with a semi-colon" do + input = "⋋" + output = [["Character", "⋋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leg; with a semi-colon" do + input = "⋚" + output = [["Character", "⋚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leq; with a semi-colon" do + input = "≤" + output = [["Character", "≤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leqq; with a semi-colon" do + input = "≦" + output = [["Character", "≦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: leqslant; with a semi-colon" do + input = "⩽" + output = [["Character", "⩽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: les; with a semi-colon" do + input = "⩽" + output = [["Character", "⩽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lescc; with a semi-colon" do + input = "⪨" + output = [["Character", "⪨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesdot; with a semi-colon" do + input = "⩿" + output = [["Character", "⩿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesdoto; with a semi-colon" do + input = "⪁" + output = [["Character", "⪁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesdotor; with a semi-colon" do + input = "⪃" + output = [["Character", "⪃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesg; with a semi-colon" do + input = "⋚︀" + output = [["Character", "⋚︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part34_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part34_test.exs new file mode 100644 index 00000000..66d83207 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part34_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart34Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: lesges; with a semi-colon" do + input = "⪓" + output = [["Character", "⪓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lessapprox; with a semi-colon" do + input = "⪅" + output = [["Character", "⪅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lessdot; with a semi-colon" do + input = "⋖" + output = [["Character", "⋖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesseqgtr; with a semi-colon" do + input = "⋚" + output = [["Character", "⋚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesseqqgtr; with a semi-colon" do + input = "⪋" + output = [["Character", "⪋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lessgtr; with a semi-colon" do + input = "≶" + output = [["Character", "≶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lesssim; with a semi-colon" do + input = "≲" + output = [["Character", "≲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lfisht; with a semi-colon" do + input = "⥼" + output = [["Character", "⥼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lfloor; with a semi-colon" do + input = "⌊" + output = [["Character", "⌊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lfr; with a semi-colon" do + input = "𝔩" + output = [["Character", "𝔩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lg; with a semi-colon" do + input = "≶" + output = [["Character", "≶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lgE; with a semi-colon" do + input = "⪑" + output = [["Character", "⪑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lhard; with a semi-colon" do + input = "↽" + output = [["Character", "↽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lharu; with a semi-colon" do + input = "↼" + output = [["Character", "↼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lharul; with a semi-colon" do + input = "⥪" + output = [["Character", "⥪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lhblk; with a semi-colon" do + input = "▄" + output = [["Character", "▄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ljcy; with a semi-colon" do + input = "љ" + output = [["Character", "љ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ll; with a semi-colon" do + input = "≪" + output = [["Character", "≪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: llarr; with a semi-colon" do + input = "⇇" + output = [["Character", "⇇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: llcorner; with a semi-colon" do + input = "⌞" + output = [["Character", "⌞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: llhard; with a semi-colon" do + input = "⥫" + output = [["Character", "⥫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lltri; with a semi-colon" do + input = "◺" + output = [["Character", "◺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lmidot; with a semi-colon" do + input = "ŀ" + output = [["Character", "ŀ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lmoust; with a semi-colon" do + input = "⎰" + output = [["Character", "⎰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lmoustache; with a semi-colon" do + input = "⎰" + output = [["Character", "⎰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lnE; with a semi-colon" do + input = "≨" + output = [["Character", "≨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lnap; with a semi-colon" do + input = "⪉" + output = [["Character", "⪉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lnapprox; with a semi-colon" do + input = "⪉" + output = [["Character", "⪉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lne; with a semi-colon" do + input = "⪇" + output = [["Character", "⪇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lneq; with a semi-colon" do + input = "⪇" + output = [["Character", "⪇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lneqq; with a semi-colon" do + input = "≨" + output = [["Character", "≨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lnsim; with a semi-colon" do + input = "⋦" + output = [["Character", "⋦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: loang; with a semi-colon" do + input = "⟬" + output = [["Character", "⟬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: loarr; with a semi-colon" do + input = "⇽" + output = [["Character", "⇽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lobrk; with a semi-colon" do + input = "⟦" + output = [["Character", "⟦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: longleftarrow; with a semi-colon" do + input = "⟵" + output = [["Character", "⟵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: longleftrightarrow; with a semi-colon" do + input = "⟷" + output = [["Character", "⟷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: longmapsto; with a semi-colon" do + input = "⟼" + output = [["Character", "⟼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: longrightarrow; with a semi-colon" do + input = "⟶" + output = [["Character", "⟶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: looparrowleft; with a semi-colon" do + input = "↫" + output = [["Character", "↫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: looparrowright; with a semi-colon" do + input = "↬" + output = [["Character", "↬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lopar; with a semi-colon" do + input = "⦅" + output = [["Character", "⦅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lopf; with a semi-colon" do + input = "𝕝" + output = [["Character", "𝕝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: loplus; with a semi-colon" do + input = "⨭" + output = [["Character", "⨭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lotimes; with a semi-colon" do + input = "⨴" + output = [["Character", "⨴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lowast; with a semi-colon" do + input = "∗" + output = [["Character", "∗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lowbar; with a semi-colon" do + input = "_" + output = [["Character", "_"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: loz; with a semi-colon" do + input = "◊" + output = [["Character", "◊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lozenge; with a semi-colon" do + input = "◊" + output = [["Character", "◊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lozf; with a semi-colon" do + input = "⧫" + output = [["Character", "⧫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lpar; with a semi-colon" do + input = "(" + output = [["Character", "("]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lparlt; with a semi-colon" do + input = "⦓" + output = [["Character", "⦓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lrarr; with a semi-colon" do + input = "⇆" + output = [["Character", "⇆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lrcorner; with a semi-colon" do + input = "⌟" + output = [["Character", "⌟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lrhar; with a semi-colon" do + input = "⇋" + output = [["Character", "⇋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lrhard; with a semi-colon" do + input = "⥭" + output = [["Character", "⥭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lrm; with a semi-colon" do + input = "‎" + output = [["Character", "‎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lrtri; with a semi-colon" do + input = "⊿" + output = [["Character", "⊿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsaquo; with a semi-colon" do + input = "‹" + output = [["Character", "‹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lscr; with a semi-colon" do + input = "𝓁" + output = [["Character", "𝓁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsh; with a semi-colon" do + input = "↰" + output = [["Character", "↰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsim; with a semi-colon" do + input = "≲" + output = [["Character", "≲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsime; with a semi-colon" do + input = "⪍" + output = [["Character", "⪍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsimg; with a semi-colon" do + input = "⪏" + output = [["Character", "⪏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsqb; with a semi-colon" do + input = "[" + output = [["Character", "["]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsquo; with a semi-colon" do + input = "‘" + output = [["Character", "‘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lsquor; with a semi-colon" do + input = "‚" + output = [["Character", "‚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lstrok; with a semi-colon" do + input = "ł" + output = [["Character", "ł"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lt without a semi-colon" do + input = "<" + output = [["Character", "<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lt; with a semi-colon" do + input = "<" + output = [["Character", "<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltcc; with a semi-colon" do + input = "⪦" + output = [["Character", "⪦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltcir; with a semi-colon" do + input = "⩹" + output = [["Character", "⩹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltdot; with a semi-colon" do + input = "⋖" + output = [["Character", "⋖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lthree; with a semi-colon" do + input = "⋋" + output = [["Character", "⋋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltimes; with a semi-colon" do + input = "⋉" + output = [["Character", "⋉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltlarr; with a semi-colon" do + input = "⥶" + output = [["Character", "⥶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltquest; with a semi-colon" do + input = "⩻" + output = [["Character", "⩻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltrPar; with a semi-colon" do + input = "⦖" + output = [["Character", "⦖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltri; with a semi-colon" do + input = "◃" + output = [["Character", "◃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltrie; with a semi-colon" do + input = "⊴" + output = [["Character", "⊴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ltrif; with a semi-colon" do + input = "◂" + output = [["Character", "◂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lurdshar; with a semi-colon" do + input = "⥊" + output = [["Character", "⥊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: luruhar; with a semi-colon" do + input = "⥦" + output = [["Character", "⥦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lvertneqq; with a semi-colon" do + input = "≨︀" + output = [["Character", "≨︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: lvnE; with a semi-colon" do + input = "≨︀" + output = [["Character", "≨︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mDDot; with a semi-colon" do + input = "∺" + output = [["Character", "∺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: macr without a semi-colon" do + input = "¯" + output = [["Character", "¯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: macr; with a semi-colon" do + input = "¯" + output = [["Character", "¯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: male; with a semi-colon" do + input = "♂" + output = [["Character", "♂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: malt; with a semi-colon" do + input = "✠" + output = [["Character", "✠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: maltese; with a semi-colon" do + input = "✠" + output = [["Character", "✠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: map; with a semi-colon" do + input = "↦" + output = [["Character", "↦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mapsto; with a semi-colon" do + input = "↦" + output = [["Character", "↦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mapstodown; with a semi-colon" do + input = "↧" + output = [["Character", "↧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mapstoleft; with a semi-colon" do + input = "↤" + output = [["Character", "↤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mapstoup; with a semi-colon" do + input = "↥" + output = [["Character", "↥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: marker; with a semi-colon" do + input = "▮" + output = [["Character", "▮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mcomma; with a semi-colon" do + input = "⨩" + output = [["Character", "⨩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mcy; with a semi-colon" do + input = "м" + output = [["Character", "м"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mdash; with a semi-colon" do + input = "—" + output = [["Character", "—"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part35_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part35_test.exs new file mode 100644 index 00000000..2c84bbb8 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part35_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart35Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: measuredangle; with a semi-colon" do + input = "∡" + output = [["Character", "∡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mfr; with a semi-colon" do + input = "𝔪" + output = [["Character", "𝔪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mho; with a semi-colon" do + input = "℧" + output = [["Character", "℧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: micro without a semi-colon" do + input = "µ" + output = [["Character", "µ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: micro; with a semi-colon" do + input = "µ" + output = [["Character", "µ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mid; with a semi-colon" do + input = "∣" + output = [["Character", "∣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: midast; with a semi-colon" do + input = "*" + output = [["Character", "*"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: midcir; with a semi-colon" do + input = "⫰" + output = [["Character", "⫰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: middot without a semi-colon" do + input = "·" + output = [["Character", "·"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: middot; with a semi-colon" do + input = "·" + output = [["Character", "·"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: minus; with a semi-colon" do + input = "−" + output = [["Character", "−"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: minusb; with a semi-colon" do + input = "⊟" + output = [["Character", "⊟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: minusd; with a semi-colon" do + input = "∸" + output = [["Character", "∸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: minusdu; with a semi-colon" do + input = "⨪" + output = [["Character", "⨪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mlcp; with a semi-colon" do + input = "⫛" + output = [["Character", "⫛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mldr; with a semi-colon" do + input = "…" + output = [["Character", "…"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mnplus; with a semi-colon" do + input = "∓" + output = [["Character", "∓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: models; with a semi-colon" do + input = "⊧" + output = [["Character", "⊧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mopf; with a semi-colon" do + input = "𝕞" + output = [["Character", "𝕞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mp; with a semi-colon" do + input = "∓" + output = [["Character", "∓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mscr; with a semi-colon" do + input = "𝓂" + output = [["Character", "𝓂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mstpos; with a semi-colon" do + input = "∾" + output = [["Character", "∾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mu; with a semi-colon" do + input = "μ" + output = [["Character", "μ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: multimap; with a semi-colon" do + input = "⊸" + output = [["Character", "⊸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: mumap; with a semi-colon" do + input = "⊸" + output = [["Character", "⊸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nGg; with a semi-colon" do + input = "⋙̸" + output = [["Character", "⋙̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nGt; with a semi-colon" do + input = "≫⃒" + output = [["Character", "≫⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nGtv; with a semi-colon" do + input = "≫̸" + output = [["Character", "≫̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nLeftarrow; with a semi-colon" do + input = "⇍" + output = [["Character", "⇍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nLeftrightarrow; with a semi-colon" do + input = "⇎" + output = [["Character", "⇎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nLl; with a semi-colon" do + input = "⋘̸" + output = [["Character", "⋘̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nLt; with a semi-colon" do + input = "≪⃒" + output = [["Character", "≪⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nLtv; with a semi-colon" do + input = "≪̸" + output = [["Character", "≪̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nRightarrow; with a semi-colon" do + input = "⇏" + output = [["Character", "⇏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nVDash; with a semi-colon" do + input = "⊯" + output = [["Character", "⊯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nVdash; with a semi-colon" do + input = "⊮" + output = [["Character", "⊮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nabla; with a semi-colon" do + input = "∇" + output = [["Character", "∇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nacute; with a semi-colon" do + input = "ń" + output = [["Character", "ń"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nang; with a semi-colon" do + input = "∠⃒" + output = [["Character", "∠⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nap; with a semi-colon" do + input = "≉" + output = [["Character", "≉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: napE; with a semi-colon" do + input = "⩰̸" + output = [["Character", "⩰̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: napid; with a semi-colon" do + input = "≋̸" + output = [["Character", "≋̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: napos; with a semi-colon" do + input = "ʼn" + output = [["Character", "ʼn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: napprox; with a semi-colon" do + input = "≉" + output = [["Character", "≉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: natur; with a semi-colon" do + input = "♮" + output = [["Character", "♮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: natural; with a semi-colon" do + input = "♮" + output = [["Character", "♮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: naturals; with a semi-colon" do + input = "ℕ" + output = [["Character", "ℕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nbsp without a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nbsp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nbump; with a semi-colon" do + input = "≎̸" + output = [["Character", "≎̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nbumpe; with a semi-colon" do + input = "≏̸" + output = [["Character", "≏̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncap; with a semi-colon" do + input = "⩃" + output = [["Character", "⩃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncaron; with a semi-colon" do + input = "ň" + output = [["Character", "ň"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncedil; with a semi-colon" do + input = "ņ" + output = [["Character", "ņ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncong; with a semi-colon" do + input = "≇" + output = [["Character", "≇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncongdot; with a semi-colon" do + input = "⩭̸" + output = [["Character", "⩭̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncup; with a semi-colon" do + input = "⩂" + output = [["Character", "⩂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ncy; with a semi-colon" do + input = "н" + output = [["Character", "н"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ndash; with a semi-colon" do + input = "–" + output = [["Character", "–"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ne; with a semi-colon" do + input = "≠" + output = [["Character", "≠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: neArr; with a semi-colon" do + input = "⇗" + output = [["Character", "⇗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nearhk; with a semi-colon" do + input = "⤤" + output = [["Character", "⤤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nearr; with a semi-colon" do + input = "↗" + output = [["Character", "↗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nearrow; with a semi-colon" do + input = "↗" + output = [["Character", "↗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nedot; with a semi-colon" do + input = "≐̸" + output = [["Character", "≐̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nequiv; with a semi-colon" do + input = "≢" + output = [["Character", "≢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nesear; with a semi-colon" do + input = "⤨" + output = [["Character", "⤨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nesim; with a semi-colon" do + input = "≂̸" + output = [["Character", "≂̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nexist; with a semi-colon" do + input = "∄" + output = [["Character", "∄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nexists; with a semi-colon" do + input = "∄" + output = [["Character", "∄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nfr; with a semi-colon" do + input = "𝔫" + output = [["Character", "𝔫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngE; with a semi-colon" do + input = "≧̸" + output = [["Character", "≧̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nge; with a semi-colon" do + input = "≱" + output = [["Character", "≱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngeq; with a semi-colon" do + input = "≱" + output = [["Character", "≱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngeqq; with a semi-colon" do + input = "≧̸" + output = [["Character", "≧̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngeqslant; with a semi-colon" do + input = "⩾̸" + output = [["Character", "⩾̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nges; with a semi-colon" do + input = "⩾̸" + output = [["Character", "⩾̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngsim; with a semi-colon" do + input = "≵" + output = [["Character", "≵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngt; with a semi-colon" do + input = "≯" + output = [["Character", "≯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ngtr; with a semi-colon" do + input = "≯" + output = [["Character", "≯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nhArr; with a semi-colon" do + input = "⇎" + output = [["Character", "⇎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nharr; with a semi-colon" do + input = "↮" + output = [["Character", "↮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nhpar; with a semi-colon" do + input = "⫲" + output = [["Character", "⫲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ni; with a semi-colon" do + input = "∋" + output = [["Character", "∋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nis; with a semi-colon" do + input = "⋼" + output = [["Character", "⋼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nisd; with a semi-colon" do + input = "⋺" + output = [["Character", "⋺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: niv; with a semi-colon" do + input = "∋" + output = [["Character", "∋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: njcy; with a semi-colon" do + input = "њ" + output = [["Character", "њ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nlArr; with a semi-colon" do + input = "⇍" + output = [["Character", "⇍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nlE; with a semi-colon" do + input = "≦̸" + output = [["Character", "≦̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nlarr; with a semi-colon" do + input = "↚" + output = [["Character", "↚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nldr; with a semi-colon" do + input = "‥" + output = [["Character", "‥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nle; with a semi-colon" do + input = "≰" + output = [["Character", "≰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nleftarrow; with a semi-colon" do + input = "↚" + output = [["Character", "↚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nleftrightarrow; with a semi-colon" do + input = "↮" + output = [["Character", "↮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nleq; with a semi-colon" do + input = "≰" + output = [["Character", "≰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nleqq; with a semi-colon" do + input = "≦̸" + output = [["Character", "≦̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nleqslant; with a semi-colon" do + input = "⩽̸" + output = [["Character", "⩽̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nles; with a semi-colon" do + input = "⩽̸" + output = [["Character", "⩽̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nless; with a semi-colon" do + input = "≮" + output = [["Character", "≮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part36_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part36_test.exs new file mode 100644 index 00000000..ae9d9206 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part36_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart36Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: nlsim; with a semi-colon" do + input = "≴" + output = [["Character", "≴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nlt; with a semi-colon" do + input = "≮" + output = [["Character", "≮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nltri; with a semi-colon" do + input = "⋪" + output = [["Character", "⋪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nltrie; with a semi-colon" do + input = "⋬" + output = [["Character", "⋬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nmid; with a semi-colon" do + input = "∤" + output = [["Character", "∤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nopf; with a semi-colon" do + input = "𝕟" + output = [["Character", "𝕟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: not without a semi-colon" do + input = "¬" + output = [["Character", "¬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: not; with a semi-colon" do + input = "¬" + output = [["Character", "¬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notin; with a semi-colon" do + input = "∉" + output = [["Character", "∉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notinE; with a semi-colon" do + input = "⋹̸" + output = [["Character", "⋹̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notindot; with a semi-colon" do + input = "⋵̸" + output = [["Character", "⋵̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notinva; with a semi-colon" do + input = "∉" + output = [["Character", "∉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notinvb; with a semi-colon" do + input = "⋷" + output = [["Character", "⋷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notinvc; with a semi-colon" do + input = "⋶" + output = [["Character", "⋶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notni; with a semi-colon" do + input = "∌" + output = [["Character", "∌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notniva; with a semi-colon" do + input = "∌" + output = [["Character", "∌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notnivb; with a semi-colon" do + input = "⋾" + output = [["Character", "⋾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: notnivc; with a semi-colon" do + input = "⋽" + output = [["Character", "⋽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: npar; with a semi-colon" do + input = "∦" + output = [["Character", "∦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nparallel; with a semi-colon" do + input = "∦" + output = [["Character", "∦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nparsl; with a semi-colon" do + input = "⫽⃥" + output = [["Character", "⫽⃥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: npart; with a semi-colon" do + input = "∂̸" + output = [["Character", "∂̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: npolint; with a semi-colon" do + input = "⨔" + output = [["Character", "⨔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: npr; with a semi-colon" do + input = "⊀" + output = [["Character", "⊀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nprcue; with a semi-colon" do + input = "⋠" + output = [["Character", "⋠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: npre; with a semi-colon" do + input = "⪯̸" + output = [["Character", "⪯̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nprec; with a semi-colon" do + input = "⊀" + output = [["Character", "⊀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: npreceq; with a semi-colon" do + input = "⪯̸" + output = [["Character", "⪯̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrArr; with a semi-colon" do + input = "⇏" + output = [["Character", "⇏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrarr; with a semi-colon" do + input = "↛" + output = [["Character", "↛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrarrc; with a semi-colon" do + input = "⤳̸" + output = [["Character", "⤳̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrarrw; with a semi-colon" do + input = "↝̸" + output = [["Character", "↝̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrightarrow; with a semi-colon" do + input = "↛" + output = [["Character", "↛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrtri; with a semi-colon" do + input = "⋫" + output = [["Character", "⋫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nrtrie; with a semi-colon" do + input = "⋭" + output = [["Character", "⋭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsc; with a semi-colon" do + input = "⊁" + output = [["Character", "⊁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsccue; with a semi-colon" do + input = "⋡" + output = [["Character", "⋡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsce; with a semi-colon" do + input = "⪰̸" + output = [["Character", "⪰̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nscr; with a semi-colon" do + input = "𝓃" + output = [["Character", "𝓃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nshortmid; with a semi-colon" do + input = "∤" + output = [["Character", "∤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nshortparallel; with a semi-colon" do + input = "∦" + output = [["Character", "∦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsim; with a semi-colon" do + input = "≁" + output = [["Character", "≁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsime; with a semi-colon" do + input = "≄" + output = [["Character", "≄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsimeq; with a semi-colon" do + input = "≄" + output = [["Character", "≄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsmid; with a semi-colon" do + input = "∤" + output = [["Character", "∤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nspar; with a semi-colon" do + input = "∦" + output = [["Character", "∦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsqsube; with a semi-colon" do + input = "⋢" + output = [["Character", "⋢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsqsupe; with a semi-colon" do + input = "⋣" + output = [["Character", "⋣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsub; with a semi-colon" do + input = "⊄" + output = [["Character", "⊄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsubE; with a semi-colon" do + input = "⫅̸" + output = [["Character", "⫅̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsube; with a semi-colon" do + input = "⊈" + output = [["Character", "⊈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsubset; with a semi-colon" do + input = "⊂⃒" + output = [["Character", "⊂⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsubseteq; with a semi-colon" do + input = "⊈" + output = [["Character", "⊈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsubseteqq; with a semi-colon" do + input = "⫅̸" + output = [["Character", "⫅̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsucc; with a semi-colon" do + input = "⊁" + output = [["Character", "⊁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsucceq; with a semi-colon" do + input = "⪰̸" + output = [["Character", "⪰̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsup; with a semi-colon" do + input = "⊅" + output = [["Character", "⊅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsupE; with a semi-colon" do + input = "⫆̸" + output = [["Character", "⫆̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsupe; with a semi-colon" do + input = "⊉" + output = [["Character", "⊉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsupset; with a semi-colon" do + input = "⊃⃒" + output = [["Character", "⊃⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsupseteq; with a semi-colon" do + input = "⊉" + output = [["Character", "⊉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nsupseteqq; with a semi-colon" do + input = "⫆̸" + output = [["Character", "⫆̸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntgl; with a semi-colon" do + input = "≹" + output = [["Character", "≹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntilde without a semi-colon" do + input = "ñ" + output = [["Character", "ñ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntilde; with a semi-colon" do + input = "ñ" + output = [["Character", "ñ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntlg; with a semi-colon" do + input = "≸" + output = [["Character", "≸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntriangleleft; with a semi-colon" do + input = "⋪" + output = [["Character", "⋪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntrianglelefteq; with a semi-colon" do + input = "⋬" + output = [["Character", "⋬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntriangleright; with a semi-colon" do + input = "⋫" + output = [["Character", "⋫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ntrianglerighteq; with a semi-colon" do + input = "⋭" + output = [["Character", "⋭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nu; with a semi-colon" do + input = "ν" + output = [["Character", "ν"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: num; with a semi-colon" do + input = "#" + output = [["Character", "#"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: numero; with a semi-colon" do + input = "№" + output = [["Character", "№"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: numsp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvDash; with a semi-colon" do + input = "⊭" + output = [["Character", "⊭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvHarr; with a semi-colon" do + input = "⤄" + output = [["Character", "⤄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvap; with a semi-colon" do + input = "≍⃒" + output = [["Character", "≍⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvdash; with a semi-colon" do + input = "⊬" + output = [["Character", "⊬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvge; with a semi-colon" do + input = "≥⃒" + output = [["Character", "≥⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvgt; with a semi-colon" do + input = ">⃒" + output = [["Character", ">⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvinfin; with a semi-colon" do + input = "⧞" + output = [["Character", "⧞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvlArr; with a semi-colon" do + input = "⤂" + output = [["Character", "⤂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvle; with a semi-colon" do + input = "≤⃒" + output = [["Character", "≤⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvlt; with a semi-colon" do + input = "<⃒" + output = [["Character", "<⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvltrie; with a semi-colon" do + input = "⊴⃒" + output = [["Character", "⊴⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvrArr; with a semi-colon" do + input = "⤃" + output = [["Character", "⤃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvrtrie; with a semi-colon" do + input = "⊵⃒" + output = [["Character", "⊵⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nvsim; with a semi-colon" do + input = "∼⃒" + output = [["Character", "∼⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nwArr; with a semi-colon" do + input = "⇖" + output = [["Character", "⇖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nwarhk; with a semi-colon" do + input = "⤣" + output = [["Character", "⤣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nwarr; with a semi-colon" do + input = "↖" + output = [["Character", "↖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nwarrow; with a semi-colon" do + input = "↖" + output = [["Character", "↖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: nwnear; with a semi-colon" do + input = "⤧" + output = [["Character", "⤧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oS; with a semi-colon" do + input = "Ⓢ" + output = [["Character", "Ⓢ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oacute without a semi-colon" do + input = "ó" + output = [["Character", "ó"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oacute; with a semi-colon" do + input = "ó" + output = [["Character", "ó"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oast; with a semi-colon" do + input = "⊛" + output = [["Character", "⊛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ocir; with a semi-colon" do + input = "⊚" + output = [["Character", "⊚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ocirc without a semi-colon" do + input = "ô" + output = [["Character", "ô"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ocirc; with a semi-colon" do + input = "ô" + output = [["Character", "ô"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part37_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part37_test.exs new file mode 100644 index 00000000..8e0daa22 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part37_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart37Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: ocy; with a semi-colon" do + input = "о" + output = [["Character", "о"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: odash; with a semi-colon" do + input = "⊝" + output = [["Character", "⊝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: odblac; with a semi-colon" do + input = "ő" + output = [["Character", "ő"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: odiv; with a semi-colon" do + input = "⨸" + output = [["Character", "⨸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: odot; with a semi-colon" do + input = "⊙" + output = [["Character", "⊙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: odsold; with a semi-colon" do + input = "⦼" + output = [["Character", "⦼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oelig; with a semi-colon" do + input = "œ" + output = [["Character", "œ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ofcir; with a semi-colon" do + input = "⦿" + output = [["Character", "⦿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ofr; with a semi-colon" do + input = "𝔬" + output = [["Character", "𝔬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ogon; with a semi-colon" do + input = "˛" + output = [["Character", "˛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ograve without a semi-colon" do + input = "ò" + output = [["Character", "ò"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ograve; with a semi-colon" do + input = "ò" + output = [["Character", "ò"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ogt; with a semi-colon" do + input = "⧁" + output = [["Character", "⧁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ohbar; with a semi-colon" do + input = "⦵" + output = [["Character", "⦵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ohm; with a semi-colon" do + input = "Ω" + output = [["Character", "Ω"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oint; with a semi-colon" do + input = "∮" + output = [["Character", "∮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: olarr; with a semi-colon" do + input = "↺" + output = [["Character", "↺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: olcir; with a semi-colon" do + input = "⦾" + output = [["Character", "⦾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: olcross; with a semi-colon" do + input = "⦻" + output = [["Character", "⦻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oline; with a semi-colon" do + input = "‾" + output = [["Character", "‾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: olt; with a semi-colon" do + input = "⧀" + output = [["Character", "⧀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: omacr; with a semi-colon" do + input = "ō" + output = [["Character", "ō"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: omega; with a semi-colon" do + input = "ω" + output = [["Character", "ω"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: omicron; with a semi-colon" do + input = "ο" + output = [["Character", "ο"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: omid; with a semi-colon" do + input = "⦶" + output = [["Character", "⦶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ominus; with a semi-colon" do + input = "⊖" + output = [["Character", "⊖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oopf; with a semi-colon" do + input = "𝕠" + output = [["Character", "𝕠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: opar; with a semi-colon" do + input = "⦷" + output = [["Character", "⦷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: operp; with a semi-colon" do + input = "⦹" + output = [["Character", "⦹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oplus; with a semi-colon" do + input = "⊕" + output = [["Character", "⊕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: or; with a semi-colon" do + input = "∨" + output = [["Character", "∨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: orarr; with a semi-colon" do + input = "↻" + output = [["Character", "↻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ord; with a semi-colon" do + input = "⩝" + output = [["Character", "⩝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: order; with a semi-colon" do + input = "ℴ" + output = [["Character", "ℴ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: orderof; with a semi-colon" do + input = "ℴ" + output = [["Character", "ℴ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ordf without a semi-colon" do + input = "ª" + output = [["Character", "ª"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ordf; with a semi-colon" do + input = "ª" + output = [["Character", "ª"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ordm without a semi-colon" do + input = "º" + output = [["Character", "º"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ordm; with a semi-colon" do + input = "º" + output = [["Character", "º"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: origof; with a semi-colon" do + input = "⊶" + output = [["Character", "⊶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oror; with a semi-colon" do + input = "⩖" + output = [["Character", "⩖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: orslope; with a semi-colon" do + input = "⩗" + output = [["Character", "⩗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: orv; with a semi-colon" do + input = "⩛" + output = [["Character", "⩛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oscr; with a semi-colon" do + input = "ℴ" + output = [["Character", "ℴ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oslash without a semi-colon" do + input = "ø" + output = [["Character", "ø"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: oslash; with a semi-colon" do + input = "ø" + output = [["Character", "ø"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: osol; with a semi-colon" do + input = "⊘" + output = [["Character", "⊘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: otilde without a semi-colon" do + input = "õ" + output = [["Character", "õ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: otilde; with a semi-colon" do + input = "õ" + output = [["Character", "õ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: otimes; with a semi-colon" do + input = "⊗" + output = [["Character", "⊗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: otimesas; with a semi-colon" do + input = "⨶" + output = [["Character", "⨶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ouml without a semi-colon" do + input = "ö" + output = [["Character", "ö"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ouml; with a semi-colon" do + input = "ö" + output = [["Character", "ö"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ovbar; with a semi-colon" do + input = "⌽" + output = [["Character", "⌽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: par; with a semi-colon" do + input = "∥" + output = [["Character", "∥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: para without a semi-colon" do + input = "¶" + output = [["Character", "¶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: para; with a semi-colon" do + input = "¶" + output = [["Character", "¶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: parallel; with a semi-colon" do + input = "∥" + output = [["Character", "∥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: parsim; with a semi-colon" do + input = "⫳" + output = [["Character", "⫳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: parsl; with a semi-colon" do + input = "⫽" + output = [["Character", "⫽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: part; with a semi-colon" do + input = "∂" + output = [["Character", "∂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pcy; with a semi-colon" do + input = "п" + output = [["Character", "п"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: percnt; with a semi-colon" do + input = "%" + output = [["Character", "%"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: period; with a semi-colon" do + input = "." + output = [["Character", "."]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: permil; with a semi-colon" do + input = "‰" + output = [["Character", "‰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: perp; with a semi-colon" do + input = "⊥" + output = [["Character", "⊥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pertenk; with a semi-colon" do + input = "‱" + output = [["Character", "‱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pfr; with a semi-colon" do + input = "𝔭" + output = [["Character", "𝔭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: phi; with a semi-colon" do + input = "φ" + output = [["Character", "φ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: phiv; with a semi-colon" do + input = "ϕ" + output = [["Character", "ϕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: phmmat; with a semi-colon" do + input = "ℳ" + output = [["Character", "ℳ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: phone; with a semi-colon" do + input = "☎" + output = [["Character", "☎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pi; with a semi-colon" do + input = "π" + output = [["Character", "π"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pitchfork; with a semi-colon" do + input = "⋔" + output = [["Character", "⋔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: piv; with a semi-colon" do + input = "ϖ" + output = [["Character", "ϖ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: planck; with a semi-colon" do + input = "ℏ" + output = [["Character", "ℏ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: planckh; with a semi-colon" do + input = "ℎ" + output = [["Character", "ℎ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plankv; with a semi-colon" do + input = "ℏ" + output = [["Character", "ℏ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plus; with a semi-colon" do + input = "+" + output = [["Character", "+"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plusacir; with a semi-colon" do + input = "⨣" + output = [["Character", "⨣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plusb; with a semi-colon" do + input = "⊞" + output = [["Character", "⊞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pluscir; with a semi-colon" do + input = "⨢" + output = [["Character", "⨢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plusdo; with a semi-colon" do + input = "∔" + output = [["Character", "∔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plusdu; with a semi-colon" do + input = "⨥" + output = [["Character", "⨥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pluse; with a semi-colon" do + input = "⩲" + output = [["Character", "⩲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plusmn without a semi-colon" do + input = "±" + output = [["Character", "±"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plusmn; with a semi-colon" do + input = "±" + output = [["Character", "±"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plussim; with a semi-colon" do + input = "⨦" + output = [["Character", "⨦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: plustwo; with a semi-colon" do + input = "⨧" + output = [["Character", "⨧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pm; with a semi-colon" do + input = "±" + output = [["Character", "±"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pointint; with a semi-colon" do + input = "⨕" + output = [["Character", "⨕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: popf; with a semi-colon" do + input = "𝕡" + output = [["Character", "𝕡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pound without a semi-colon" do + input = "£" + output = [["Character", "£"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pound; with a semi-colon" do + input = "£" + output = [["Character", "£"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pr; with a semi-colon" do + input = "≺" + output = [["Character", "≺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prE; with a semi-colon" do + input = "⪳" + output = [["Character", "⪳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prap; with a semi-colon" do + input = "⪷" + output = [["Character", "⪷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prcue; with a semi-colon" do + input = "≼" + output = [["Character", "≼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pre; with a semi-colon" do + input = "⪯" + output = [["Character", "⪯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prec; with a semi-colon" do + input = "≺" + output = [["Character", "≺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part38_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part38_test.exs new file mode 100644 index 00000000..15883279 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part38_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart38Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: precapprox; with a semi-colon" do + input = "⪷" + output = [["Character", "⪷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: preccurlyeq; with a semi-colon" do + input = "≼" + output = [["Character", "≼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: preceq; with a semi-colon" do + input = "⪯" + output = [["Character", "⪯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: precnapprox; with a semi-colon" do + input = "⪹" + output = [["Character", "⪹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: precneqq; with a semi-colon" do + input = "⪵" + output = [["Character", "⪵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: precnsim; with a semi-colon" do + input = "⋨" + output = [["Character", "⋨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: precsim; with a semi-colon" do + input = "≾" + output = [["Character", "≾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prime; with a semi-colon" do + input = "′" + output = [["Character", "′"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: primes; with a semi-colon" do + input = "ℙ" + output = [["Character", "ℙ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prnE; with a semi-colon" do + input = "⪵" + output = [["Character", "⪵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prnap; with a semi-colon" do + input = "⪹" + output = [["Character", "⪹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prnsim; with a semi-colon" do + input = "⋨" + output = [["Character", "⋨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prod; with a semi-colon" do + input = "∏" + output = [["Character", "∏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: profalar; with a semi-colon" do + input = "⌮" + output = [["Character", "⌮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: profline; with a semi-colon" do + input = "⌒" + output = [["Character", "⌒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: profsurf; with a semi-colon" do + input = "⌓" + output = [["Character", "⌓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prop; with a semi-colon" do + input = "∝" + output = [["Character", "∝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: propto; with a semi-colon" do + input = "∝" + output = [["Character", "∝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prsim; with a semi-colon" do + input = "≾" + output = [["Character", "≾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: prurel; with a semi-colon" do + input = "⊰" + output = [["Character", "⊰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: pscr; with a semi-colon" do + input = "𝓅" + output = [["Character", "𝓅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: psi; with a semi-colon" do + input = "ψ" + output = [["Character", "ψ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: puncsp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: qfr; with a semi-colon" do + input = "𝔮" + output = [["Character", "𝔮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: qint; with a semi-colon" do + input = "⨌" + output = [["Character", "⨌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: qopf; with a semi-colon" do + input = "𝕢" + output = [["Character", "𝕢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: qprime; with a semi-colon" do + input = "⁗" + output = [["Character", "⁗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: qscr; with a semi-colon" do + input = "𝓆" + output = [["Character", "𝓆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: quaternions; with a semi-colon" do + input = "ℍ" + output = [["Character", "ℍ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: quatint; with a semi-colon" do + input = "⨖" + output = [["Character", "⨖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: quest; with a semi-colon" do + input = "?" + output = [["Character", "?"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: questeq; with a semi-colon" do + input = "≟" + output = [["Character", "≟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: quot without a semi-colon" do + input = """ + output = [["Character", "\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: quot; with a semi-colon" do + input = """ + output = [["Character", "\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rAarr; with a semi-colon" do + input = "⇛" + output = [["Character", "⇛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rArr; with a semi-colon" do + input = "⇒" + output = [["Character", "⇒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rAtail; with a semi-colon" do + input = "⤜" + output = [["Character", "⤜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rBarr; with a semi-colon" do + input = "⤏" + output = [["Character", "⤏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rHar; with a semi-colon" do + input = "⥤" + output = [["Character", "⥤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: race; with a semi-colon" do + input = "∽̱" + output = [["Character", "∽̱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: racute; with a semi-colon" do + input = "ŕ" + output = [["Character", "ŕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: radic; with a semi-colon" do + input = "√" + output = [["Character", "√"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: raemptyv; with a semi-colon" do + input = "⦳" + output = [["Character", "⦳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rang; with a semi-colon" do + input = "⟩" + output = [["Character", "⟩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rangd; with a semi-colon" do + input = "⦒" + output = [["Character", "⦒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: range; with a semi-colon" do + input = "⦥" + output = [["Character", "⦥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rangle; with a semi-colon" do + input = "⟩" + output = [["Character", "⟩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: raquo without a semi-colon" do + input = "»" + output = [["Character", "»"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: raquo; with a semi-colon" do + input = "»" + output = [["Character", "»"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarr; with a semi-colon" do + input = "→" + output = [["Character", "→"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrap; with a semi-colon" do + input = "⥵" + output = [["Character", "⥵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrb; with a semi-colon" do + input = "⇥" + output = [["Character", "⇥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrbfs; with a semi-colon" do + input = "⤠" + output = [["Character", "⤠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrc; with a semi-colon" do + input = "⤳" + output = [["Character", "⤳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrfs; with a semi-colon" do + input = "⤞" + output = [["Character", "⤞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrhk; with a semi-colon" do + input = "↪" + output = [["Character", "↪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrlp; with a semi-colon" do + input = "↬" + output = [["Character", "↬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrpl; with a semi-colon" do + input = "⥅" + output = [["Character", "⥅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrsim; with a semi-colon" do + input = "⥴" + output = [["Character", "⥴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrtl; with a semi-colon" do + input = "↣" + output = [["Character", "↣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rarrw; with a semi-colon" do + input = "↝" + output = [["Character", "↝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ratail; with a semi-colon" do + input = "⤚" + output = [["Character", "⤚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ratio; with a semi-colon" do + input = "∶" + output = [["Character", "∶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rationals; with a semi-colon" do + input = "ℚ" + output = [["Character", "ℚ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbarr; with a semi-colon" do + input = "⤍" + output = [["Character", "⤍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbbrk; with a semi-colon" do + input = "❳" + output = [["Character", "❳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbrace; with a semi-colon" do + input = "}" + output = [["Character", "}"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbrack; with a semi-colon" do + input = "]" + output = [["Character", "]"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbrke; with a semi-colon" do + input = "⦌" + output = [["Character", "⦌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbrksld; with a semi-colon" do + input = "⦎" + output = [["Character", "⦎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rbrkslu; with a semi-colon" do + input = "⦐" + output = [["Character", "⦐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rcaron; with a semi-colon" do + input = "ř" + output = [["Character", "ř"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rcedil; with a semi-colon" do + input = "ŗ" + output = [["Character", "ŗ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rceil; with a semi-colon" do + input = "⌉" + output = [["Character", "⌉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rcub; with a semi-colon" do + input = "}" + output = [["Character", "}"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rcy; with a semi-colon" do + input = "р" + output = [["Character", "р"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rdca; with a semi-colon" do + input = "⤷" + output = [["Character", "⤷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rdldhar; with a semi-colon" do + input = "⥩" + output = [["Character", "⥩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rdquo; with a semi-colon" do + input = "”" + output = [["Character", "”"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rdquor; with a semi-colon" do + input = "”" + output = [["Character", "”"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rdsh; with a semi-colon" do + input = "↳" + output = [["Character", "↳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: real; with a semi-colon" do + input = "ℜ" + output = [["Character", "ℜ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: realine; with a semi-colon" do + input = "ℛ" + output = [["Character", "ℛ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: realpart; with a semi-colon" do + input = "ℜ" + output = [["Character", "ℜ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: reals; with a semi-colon" do + input = "ℝ" + output = [["Character", "ℝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rect; with a semi-colon" do + input = "▭" + output = [["Character", "▭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: reg without a semi-colon" do + input = "®" + output = [["Character", "®"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: reg; with a semi-colon" do + input = "®" + output = [["Character", "®"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rfisht; with a semi-colon" do + input = "⥽" + output = [["Character", "⥽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rfloor; with a semi-colon" do + input = "⌋" + output = [["Character", "⌋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rfr; with a semi-colon" do + input = "𝔯" + output = [["Character", "𝔯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rhard; with a semi-colon" do + input = "⇁" + output = [["Character", "⇁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rharu; with a semi-colon" do + input = "⇀" + output = [["Character", "⇀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rharul; with a semi-colon" do + input = "⥬" + output = [["Character", "⥬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rho; with a semi-colon" do + input = "ρ" + output = [["Character", "ρ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rhov; with a semi-colon" do + input = "ϱ" + output = [["Character", "ϱ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightarrow; with a semi-colon" do + input = "→" + output = [["Character", "→"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightarrowtail; with a semi-colon" do + input = "↣" + output = [["Character", "↣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightharpoondown; with a semi-colon" do + input = "⇁" + output = [["Character", "⇁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightharpoonup; with a semi-colon" do + input = "⇀" + output = [["Character", "⇀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part39_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part39_test.exs new file mode 100644 index 00000000..b9ccdcd7 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part39_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart39Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: rightleftarrows; with a semi-colon" do + input = "⇄" + output = [["Character", "⇄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightleftharpoons; with a semi-colon" do + input = "⇌" + output = [["Character", "⇌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightrightarrows; with a semi-colon" do + input = "⇉" + output = [["Character", "⇉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightsquigarrow; with a semi-colon" do + input = "↝" + output = [["Character", "↝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rightthreetimes; with a semi-colon" do + input = "⋌" + output = [["Character", "⋌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ring; with a semi-colon" do + input = "˚" + output = [["Character", "˚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: risingdotseq; with a semi-colon" do + input = "≓" + output = [["Character", "≓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rlarr; with a semi-colon" do + input = "⇄" + output = [["Character", "⇄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rlhar; with a semi-colon" do + input = "⇌" + output = [["Character", "⇌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rlm; with a semi-colon" do + input = "‏" + output = [["Character", "‏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rmoust; with a semi-colon" do + input = "⎱" + output = [["Character", "⎱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rmoustache; with a semi-colon" do + input = "⎱" + output = [["Character", "⎱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rnmid; with a semi-colon" do + input = "⫮" + output = [["Character", "⫮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: roang; with a semi-colon" do + input = "⟭" + output = [["Character", "⟭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: roarr; with a semi-colon" do + input = "⇾" + output = [["Character", "⇾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: robrk; with a semi-colon" do + input = "⟧" + output = [["Character", "⟧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ropar; with a semi-colon" do + input = "⦆" + output = [["Character", "⦆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ropf; with a semi-colon" do + input = "𝕣" + output = [["Character", "𝕣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: roplus; with a semi-colon" do + input = "⨮" + output = [["Character", "⨮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rotimes; with a semi-colon" do + input = "⨵" + output = [["Character", "⨵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rpar; with a semi-colon" do + input = ")" + output = [["Character", ")"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rpargt; with a semi-colon" do + input = "⦔" + output = [["Character", "⦔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rppolint; with a semi-colon" do + input = "⨒" + output = [["Character", "⨒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rrarr; with a semi-colon" do + input = "⇉" + output = [["Character", "⇉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rsaquo; with a semi-colon" do + input = "›" + output = [["Character", "›"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rscr; with a semi-colon" do + input = "𝓇" + output = [["Character", "𝓇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rsh; with a semi-colon" do + input = "↱" + output = [["Character", "↱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rsqb; with a semi-colon" do + input = "]" + output = [["Character", "]"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rsquo; with a semi-colon" do + input = "’" + output = [["Character", "’"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rsquor; with a semi-colon" do + input = "’" + output = [["Character", "’"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rthree; with a semi-colon" do + input = "⋌" + output = [["Character", "⋌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rtimes; with a semi-colon" do + input = "⋊" + output = [["Character", "⋊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rtri; with a semi-colon" do + input = "▹" + output = [["Character", "▹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rtrie; with a semi-colon" do + input = "⊵" + output = [["Character", "⊵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rtrif; with a semi-colon" do + input = "▸" + output = [["Character", "▸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rtriltri; with a semi-colon" do + input = "⧎" + output = [["Character", "⧎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ruluhar; with a semi-colon" do + input = "⥨" + output = [["Character", "⥨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: rx; with a semi-colon" do + input = "℞" + output = [["Character", "℞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sacute; with a semi-colon" do + input = "ś" + output = [["Character", "ś"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sbquo; with a semi-colon" do + input = "‚" + output = [["Character", "‚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sc; with a semi-colon" do + input = "≻" + output = [["Character", "≻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scE; with a semi-colon" do + input = "⪴" + output = [["Character", "⪴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scap; with a semi-colon" do + input = "⪸" + output = [["Character", "⪸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scaron; with a semi-colon" do + input = "š" + output = [["Character", "š"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sccue; with a semi-colon" do + input = "≽" + output = [["Character", "≽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sce; with a semi-colon" do + input = "⪰" + output = [["Character", "⪰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scedil; with a semi-colon" do + input = "ş" + output = [["Character", "ş"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scirc; with a semi-colon" do + input = "ŝ" + output = [["Character", "ŝ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scnE; with a semi-colon" do + input = "⪶" + output = [["Character", "⪶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scnap; with a semi-colon" do + input = "⪺" + output = [["Character", "⪺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scnsim; with a semi-colon" do + input = "⋩" + output = [["Character", "⋩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scpolint; with a semi-colon" do + input = "⨓" + output = [["Character", "⨓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scsim; with a semi-colon" do + input = "≿" + output = [["Character", "≿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: scy; with a semi-colon" do + input = "с" + output = [["Character", "с"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sdot; with a semi-colon" do + input = "⋅" + output = [["Character", "⋅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sdotb; with a semi-colon" do + input = "⊡" + output = [["Character", "⊡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sdote; with a semi-colon" do + input = "⩦" + output = [["Character", "⩦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: seArr; with a semi-colon" do + input = "⇘" + output = [["Character", "⇘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: searhk; with a semi-colon" do + input = "⤥" + output = [["Character", "⤥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: searr; with a semi-colon" do + input = "↘" + output = [["Character", "↘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: searrow; with a semi-colon" do + input = "↘" + output = [["Character", "↘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sect without a semi-colon" do + input = "§" + output = [["Character", "§"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sect; with a semi-colon" do + input = "§" + output = [["Character", "§"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: semi; with a semi-colon" do + input = ";" + output = [["Character", ";"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: seswar; with a semi-colon" do + input = "⤩" + output = [["Character", "⤩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: setminus; with a semi-colon" do + input = "∖" + output = [["Character", "∖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: setmn; with a semi-colon" do + input = "∖" + output = [["Character", "∖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sext; with a semi-colon" do + input = "✶" + output = [["Character", "✶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sfr; with a semi-colon" do + input = "𝔰" + output = [["Character", "𝔰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sfrown; with a semi-colon" do + input = "⌢" + output = [["Character", "⌢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sharp; with a semi-colon" do + input = "♯" + output = [["Character", "♯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: shchcy; with a semi-colon" do + input = "щ" + output = [["Character", "щ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: shcy; with a semi-colon" do + input = "ш" + output = [["Character", "ш"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: shortmid; with a semi-colon" do + input = "∣" + output = [["Character", "∣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: shortparallel; with a semi-colon" do + input = "∥" + output = [["Character", "∥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: shy without a semi-colon" do + input = "­" + output = [["Character", "­"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: shy; with a semi-colon" do + input = "­" + output = [["Character", "­"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sigma; with a semi-colon" do + input = "σ" + output = [["Character", "σ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sigmaf; with a semi-colon" do + input = "ς" + output = [["Character", "ς"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sigmav; with a semi-colon" do + input = "ς" + output = [["Character", "ς"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sim; with a semi-colon" do + input = "∼" + output = [["Character", "∼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simdot; with a semi-colon" do + input = "⩪" + output = [["Character", "⩪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sime; with a semi-colon" do + input = "≃" + output = [["Character", "≃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simeq; with a semi-colon" do + input = "≃" + output = [["Character", "≃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simg; with a semi-colon" do + input = "⪞" + output = [["Character", "⪞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simgE; with a semi-colon" do + input = "⪠" + output = [["Character", "⪠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: siml; with a semi-colon" do + input = "⪝" + output = [["Character", "⪝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simlE; with a semi-colon" do + input = "⪟" + output = [["Character", "⪟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simne; with a semi-colon" do + input = "≆" + output = [["Character", "≆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simplus; with a semi-colon" do + input = "⨤" + output = [["Character", "⨤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: simrarr; with a semi-colon" do + input = "⥲" + output = [["Character", "⥲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: slarr; with a semi-colon" do + input = "←" + output = [["Character", "←"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smallsetminus; with a semi-colon" do + input = "∖" + output = [["Character", "∖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smashp; with a semi-colon" do + input = "⨳" + output = [["Character", "⨳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smeparsl; with a semi-colon" do + input = "⧤" + output = [["Character", "⧤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smid; with a semi-colon" do + input = "∣" + output = [["Character", "∣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smile; with a semi-colon" do + input = "⌣" + output = [["Character", "⌣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smt; with a semi-colon" do + input = "⪪" + output = [["Character", "⪪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smte; with a semi-colon" do + input = "⪬" + output = [["Character", "⪬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: smtes; with a semi-colon" do + input = "⪬︀" + output = [["Character", "⪬︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part3_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part3_test.exs new file mode 100644 index 00000000..5cca7050 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part3_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart3Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: Kcedil without a semi-colon" do + input = "&Kcedil" + output = [["Character", "&Kcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Kcy without a semi-colon" do + input = "&Kcy" + output = [["Character", "&Kcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Kfr without a semi-colon" do + input = "&Kfr" + output = [["Character", "&Kfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Kopf without a semi-colon" do + input = "&Kopf" + output = [["Character", "&Kopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Kscr without a semi-colon" do + input = "&Kscr" + output = [["Character", "&Kscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LJcy without a semi-colon" do + input = "&LJcy" + output = [["Character", "&LJcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lacute without a semi-colon" do + input = "&Lacute" + output = [["Character", "&Lacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lambda without a semi-colon" do + input = "&Lambda" + output = [["Character", "&Lambda"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lang without a semi-colon" do + input = "&Lang" + output = [["Character", "&Lang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Laplacetrf without a semi-colon" do + input = "&Laplacetrf" + output = [["Character", "&Laplacetrf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Larr without a semi-colon" do + input = "&Larr" + output = [["Character", "&Larr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lcaron without a semi-colon" do + input = "&Lcaron" + output = [["Character", "&Lcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lcedil without a semi-colon" do + input = "&Lcedil" + output = [["Character", "&Lcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lcy without a semi-colon" do + input = "&Lcy" + output = [["Character", "&Lcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftAngleBracket without a semi-colon" do + input = "&LeftAngleBracket" + output = [["Character", "&LeftAngleBracket"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftArrow without a semi-colon" do + input = "&LeftArrow" + output = [["Character", "&LeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftArrowBar without a semi-colon" do + input = "&LeftArrowBar" + output = [["Character", "&LeftArrowBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftArrowRightArrow without a semi-colon" do + input = "&LeftArrowRightArrow" + output = [["Character", "&LeftArrowRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftCeiling without a semi-colon" do + input = "&LeftCeiling" + output = [["Character", "&LeftCeiling"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftDoubleBracket without a semi-colon" do + input = "&LeftDoubleBracket" + output = [["Character", "&LeftDoubleBracket"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftDownTeeVector without a semi-colon" do + input = "&LeftDownTeeVector" + output = [["Character", "&LeftDownTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftDownVector without a semi-colon" do + input = "&LeftDownVector" + output = [["Character", "&LeftDownVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftDownVectorBar without a semi-colon" do + input = "&LeftDownVectorBar" + output = [["Character", "&LeftDownVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftFloor without a semi-colon" do + input = "&LeftFloor" + output = [["Character", "&LeftFloor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftRightArrow without a semi-colon" do + input = "&LeftRightArrow" + output = [["Character", "&LeftRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftRightVector without a semi-colon" do + input = "&LeftRightVector" + output = [["Character", "&LeftRightVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftTee without a semi-colon" do + input = "&LeftTee" + output = [["Character", "&LeftTee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftTeeArrow without a semi-colon" do + input = "&LeftTeeArrow" + output = [["Character", "&LeftTeeArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftTeeVector without a semi-colon" do + input = "&LeftTeeVector" + output = [["Character", "&LeftTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftTriangle without a semi-colon" do + input = "&LeftTriangle" + output = [["Character", "&LeftTriangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftTriangleBar without a semi-colon" do + input = "&LeftTriangleBar" + output = [["Character", "&LeftTriangleBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftTriangleEqual without a semi-colon" do + input = "&LeftTriangleEqual" + output = [["Character", "&LeftTriangleEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftUpDownVector without a semi-colon" do + input = "&LeftUpDownVector" + output = [["Character", "&LeftUpDownVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftUpTeeVector without a semi-colon" do + input = "&LeftUpTeeVector" + output = [["Character", "&LeftUpTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftUpVector without a semi-colon" do + input = "&LeftUpVector" + output = [["Character", "&LeftUpVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftUpVectorBar without a semi-colon" do + input = "&LeftUpVectorBar" + output = [["Character", "&LeftUpVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftVector without a semi-colon" do + input = "&LeftVector" + output = [["Character", "&LeftVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LeftVectorBar without a semi-colon" do + input = "&LeftVectorBar" + output = [["Character", "&LeftVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Leftarrow without a semi-colon" do + input = "&Leftarrow" + output = [["Character", "&Leftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Leftrightarrow without a semi-colon" do + input = "&Leftrightarrow" + output = [["Character", "&Leftrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LessEqualGreater without a semi-colon" do + input = "&LessEqualGreater" + output = [["Character", "&LessEqualGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LessFullEqual without a semi-colon" do + input = "&LessFullEqual" + output = [["Character", "&LessFullEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LessGreater without a semi-colon" do + input = "&LessGreater" + output = [["Character", "&LessGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LessLess without a semi-colon" do + input = "&LessLess" + output = [["Character", "&LessLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LessSlantEqual without a semi-colon" do + input = "&LessSlantEqual" + output = [["Character", "&LessSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LessTilde without a semi-colon" do + input = "&LessTilde" + output = [["Character", "&LessTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lfr without a semi-colon" do + input = "&Lfr" + output = [["Character", "&Lfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ll without a semi-colon" do + input = "&Ll" + output = [["Character", "&Ll"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lleftarrow without a semi-colon" do + input = "&Lleftarrow" + output = [["Character", "&Lleftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lmidot without a semi-colon" do + input = "&Lmidot" + output = [["Character", "&Lmidot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LongLeftArrow without a semi-colon" do + input = "&LongLeftArrow" + output = [["Character", "&LongLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LongLeftRightArrow without a semi-colon" do + input = "&LongLeftRightArrow" + output = [["Character", "&LongLeftRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LongRightArrow without a semi-colon" do + input = "&LongRightArrow" + output = [["Character", "&LongRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Longleftarrow without a semi-colon" do + input = "&Longleftarrow" + output = [["Character", "&Longleftarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Longleftrightarrow without a semi-colon" do + input = "&Longleftrightarrow" + output = [["Character", "&Longleftrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Longrightarrow without a semi-colon" do + input = "&Longrightarrow" + output = [["Character", "&Longrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lopf without a semi-colon" do + input = "&Lopf" + output = [["Character", "&Lopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LowerLeftArrow without a semi-colon" do + input = "&LowerLeftArrow" + output = [["Character", "&LowerLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: LowerRightArrow without a semi-colon" do + input = "&LowerRightArrow" + output = [["Character", "&LowerRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lscr without a semi-colon" do + input = "&Lscr" + output = [["Character", "&Lscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lsh without a semi-colon" do + input = "&Lsh" + output = [["Character", "&Lsh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lstrok without a semi-colon" do + input = "&Lstrok" + output = [["Character", "&Lstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Lt without a semi-colon" do + input = "&Lt" + output = [["Character", "&Lt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Map without a semi-colon" do + input = "&Map" + output = [["Character", "&Map"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Mcy without a semi-colon" do + input = "&Mcy" + output = [["Character", "&Mcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: MediumSpace without a semi-colon" do + input = "&MediumSpace" + output = [["Character", "&MediumSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Mellintrf without a semi-colon" do + input = "&Mellintrf" + output = [["Character", "&Mellintrf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Mfr without a semi-colon" do + input = "&Mfr" + output = [["Character", "&Mfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: MinusPlus without a semi-colon" do + input = "&MinusPlus" + output = [["Character", "&MinusPlus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Mopf without a semi-colon" do + input = "&Mopf" + output = [["Character", "&Mopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Mscr without a semi-colon" do + input = "&Mscr" + output = [["Character", "&Mscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Mu without a semi-colon" do + input = "&Mu" + output = [["Character", "&Mu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NJcy without a semi-colon" do + input = "&NJcy" + output = [["Character", "&NJcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Nacute without a semi-colon" do + input = "&Nacute" + output = [["Character", "&Nacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ncaron without a semi-colon" do + input = "&Ncaron" + output = [["Character", "&Ncaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ncedil without a semi-colon" do + input = "&Ncedil" + output = [["Character", "&Ncedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ncy without a semi-colon" do + input = "&Ncy" + output = [["Character", "&Ncy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NegativeMediumSpace without a semi-colon" do + input = "&NegativeMediumSpace" + output = [["Character", "&NegativeMediumSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NegativeThickSpace without a semi-colon" do + input = "&NegativeThickSpace" + output = [["Character", "&NegativeThickSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NegativeThinSpace without a semi-colon" do + input = "&NegativeThinSpace" + output = [["Character", "&NegativeThinSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NegativeVeryThinSpace without a semi-colon" do + input = "&NegativeVeryThinSpace" + output = [["Character", "&NegativeVeryThinSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NestedGreaterGreater without a semi-colon" do + input = "&NestedGreaterGreater" + output = [["Character", "&NestedGreaterGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NestedLessLess without a semi-colon" do + input = "&NestedLessLess" + output = [["Character", "&NestedLessLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NewLine without a semi-colon" do + input = "&NewLine" + output = [["Character", "&NewLine"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Nfr without a semi-colon" do + input = "&Nfr" + output = [["Character", "&Nfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NoBreak without a semi-colon" do + input = "&NoBreak" + output = [["Character", "&NoBreak"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NonBreakingSpace without a semi-colon" do + input = "&NonBreakingSpace" + output = [["Character", "&NonBreakingSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Nopf without a semi-colon" do + input = "&Nopf" + output = [["Character", "&Nopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Not without a semi-colon" do + input = "&Not" + output = [["Character", "&Not"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotCongruent without a semi-colon" do + input = "&NotCongruent" + output = [["Character", "&NotCongruent"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotCupCap without a semi-colon" do + input = "&NotCupCap" + output = [["Character", "&NotCupCap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotDoubleVerticalBar without a semi-colon" do + input = "&NotDoubleVerticalBar" + output = [["Character", "&NotDoubleVerticalBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotElement without a semi-colon" do + input = "&NotElement" + output = [["Character", "&NotElement"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotEqual without a semi-colon" do + input = "&NotEqual" + output = [["Character", "&NotEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotEqualTilde without a semi-colon" do + input = "&NotEqualTilde" + output = [["Character", "&NotEqualTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotExists without a semi-colon" do + input = "&NotExists" + output = [["Character", "&NotExists"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotGreater without a semi-colon" do + input = "&NotGreater" + output = [["Character", "&NotGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotGreaterEqual without a semi-colon" do + input = "&NotGreaterEqual" + output = [["Character", "&NotGreaterEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotGreaterFullEqual without a semi-colon" do + input = "&NotGreaterFullEqual" + output = [["Character", "&NotGreaterFullEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotGreaterGreater without a semi-colon" do + input = "&NotGreaterGreater" + output = [["Character", "&NotGreaterGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part40_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part40_test.exs new file mode 100644 index 00000000..2f5c4893 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part40_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart40Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: softcy; with a semi-colon" do + input = "ь" + output = [["Character", "ь"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sol; with a semi-colon" do + input = "/" + output = [["Character", "/"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: solb; with a semi-colon" do + input = "⧄" + output = [["Character", "⧄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: solbar; with a semi-colon" do + input = "⌿" + output = [["Character", "⌿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sopf; with a semi-colon" do + input = "𝕤" + output = [["Character", "𝕤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: spades; with a semi-colon" do + input = "♠" + output = [["Character", "♠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: spadesuit; with a semi-colon" do + input = "♠" + output = [["Character", "♠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: spar; with a semi-colon" do + input = "∥" + output = [["Character", "∥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqcap; with a semi-colon" do + input = "⊓" + output = [["Character", "⊓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqcaps; with a semi-colon" do + input = "⊓︀" + output = [["Character", "⊓︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqcup; with a semi-colon" do + input = "⊔" + output = [["Character", "⊔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqcups; with a semi-colon" do + input = "⊔︀" + output = [["Character", "⊔︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsub; with a semi-colon" do + input = "⊏" + output = [["Character", "⊏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsube; with a semi-colon" do + input = "⊑" + output = [["Character", "⊑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsubset; with a semi-colon" do + input = "⊏" + output = [["Character", "⊏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsubseteq; with a semi-colon" do + input = "⊑" + output = [["Character", "⊑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsup; with a semi-colon" do + input = "⊐" + output = [["Character", "⊐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsupe; with a semi-colon" do + input = "⊒" + output = [["Character", "⊒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsupset; with a semi-colon" do + input = "⊐" + output = [["Character", "⊐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sqsupseteq; with a semi-colon" do + input = "⊒" + output = [["Character", "⊒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: squ; with a semi-colon" do + input = "□" + output = [["Character", "□"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: square; with a semi-colon" do + input = "□" + output = [["Character", "□"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: squarf; with a semi-colon" do + input = "▪" + output = [["Character", "▪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: squf; with a semi-colon" do + input = "▪" + output = [["Character", "▪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: srarr; with a semi-colon" do + input = "→" + output = [["Character", "→"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sscr; with a semi-colon" do + input = "𝓈" + output = [["Character", "𝓈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ssetmn; with a semi-colon" do + input = "∖" + output = [["Character", "∖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ssmile; with a semi-colon" do + input = "⌣" + output = [["Character", "⌣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sstarf; with a semi-colon" do + input = "⋆" + output = [["Character", "⋆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: star; with a semi-colon" do + input = "☆" + output = [["Character", "☆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: starf; with a semi-colon" do + input = "★" + output = [["Character", "★"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: straightepsilon; with a semi-colon" do + input = "ϵ" + output = [["Character", "ϵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: straightphi; with a semi-colon" do + input = "ϕ" + output = [["Character", "ϕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: strns; with a semi-colon" do + input = "¯" + output = [["Character", "¯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sub; with a semi-colon" do + input = "⊂" + output = [["Character", "⊂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subE; with a semi-colon" do + input = "⫅" + output = [["Character", "⫅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subdot; with a semi-colon" do + input = "⪽" + output = [["Character", "⪽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sube; with a semi-colon" do + input = "⊆" + output = [["Character", "⊆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subedot; with a semi-colon" do + input = "⫃" + output = [["Character", "⫃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: submult; with a semi-colon" do + input = "⫁" + output = [["Character", "⫁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subnE; with a semi-colon" do + input = "⫋" + output = [["Character", "⫋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subne; with a semi-colon" do + input = "⊊" + output = [["Character", "⊊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subplus; with a semi-colon" do + input = "⪿" + output = [["Character", "⪿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subrarr; with a semi-colon" do + input = "⥹" + output = [["Character", "⥹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subset; with a semi-colon" do + input = "⊂" + output = [["Character", "⊂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subseteq; with a semi-colon" do + input = "⊆" + output = [["Character", "⊆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subseteqq; with a semi-colon" do + input = "⫅" + output = [["Character", "⫅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subsetneq; with a semi-colon" do + input = "⊊" + output = [["Character", "⊊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subsetneqq; with a semi-colon" do + input = "⫋" + output = [["Character", "⫋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subsim; with a semi-colon" do + input = "⫇" + output = [["Character", "⫇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subsub; with a semi-colon" do + input = "⫕" + output = [["Character", "⫕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: subsup; with a semi-colon" do + input = "⫓" + output = [["Character", "⫓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succ; with a semi-colon" do + input = "≻" + output = [["Character", "≻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succapprox; with a semi-colon" do + input = "⪸" + output = [["Character", "⪸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succcurlyeq; with a semi-colon" do + input = "≽" + output = [["Character", "≽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succeq; with a semi-colon" do + input = "⪰" + output = [["Character", "⪰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succnapprox; with a semi-colon" do + input = "⪺" + output = [["Character", "⪺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succneqq; with a semi-colon" do + input = "⪶" + output = [["Character", "⪶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succnsim; with a semi-colon" do + input = "⋩" + output = [["Character", "⋩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: succsim; with a semi-colon" do + input = "≿" + output = [["Character", "≿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sum; with a semi-colon" do + input = "∑" + output = [["Character", "∑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sung; with a semi-colon" do + input = "♪" + output = [["Character", "♪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup1 without a semi-colon" do + input = "¹" + output = [["Character", "¹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup1; with a semi-colon" do + input = "¹" + output = [["Character", "¹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup2 without a semi-colon" do + input = "²" + output = [["Character", "²"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup2; with a semi-colon" do + input = "²" + output = [["Character", "²"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup3 without a semi-colon" do + input = "³" + output = [["Character", "³"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup3; with a semi-colon" do + input = "³" + output = [["Character", "³"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: sup; with a semi-colon" do + input = "⊃" + output = [["Character", "⊃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supE; with a semi-colon" do + input = "⫆" + output = [["Character", "⫆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supdot; with a semi-colon" do + input = "⪾" + output = [["Character", "⪾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supdsub; with a semi-colon" do + input = "⫘" + output = [["Character", "⫘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supe; with a semi-colon" do + input = "⊇" + output = [["Character", "⊇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supedot; with a semi-colon" do + input = "⫄" + output = [["Character", "⫄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: suphsol; with a semi-colon" do + input = "⟉" + output = [["Character", "⟉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: suphsub; with a semi-colon" do + input = "⫗" + output = [["Character", "⫗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: suplarr; with a semi-colon" do + input = "⥻" + output = [["Character", "⥻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supmult; with a semi-colon" do + input = "⫂" + output = [["Character", "⫂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supnE; with a semi-colon" do + input = "⫌" + output = [["Character", "⫌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supne; with a semi-colon" do + input = "⊋" + output = [["Character", "⊋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supplus; with a semi-colon" do + input = "⫀" + output = [["Character", "⫀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supset; with a semi-colon" do + input = "⊃" + output = [["Character", "⊃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supseteq; with a semi-colon" do + input = "⊇" + output = [["Character", "⊇"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supseteqq; with a semi-colon" do + input = "⫆" + output = [["Character", "⫆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supsetneq; with a semi-colon" do + input = "⊋" + output = [["Character", "⊋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supsetneqq; with a semi-colon" do + input = "⫌" + output = [["Character", "⫌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supsim; with a semi-colon" do + input = "⫈" + output = [["Character", "⫈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supsub; with a semi-colon" do + input = "⫔" + output = [["Character", "⫔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: supsup; with a semi-colon" do + input = "⫖" + output = [["Character", "⫖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: swArr; with a semi-colon" do + input = "⇙" + output = [["Character", "⇙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: swarhk; with a semi-colon" do + input = "⤦" + output = [["Character", "⤦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: swarr; with a semi-colon" do + input = "↙" + output = [["Character", "↙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: swarrow; with a semi-colon" do + input = "↙" + output = [["Character", "↙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: swnwar; with a semi-colon" do + input = "⤪" + output = [["Character", "⤪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: szlig without a semi-colon" do + input = "ß" + output = [["Character", "ß"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: szlig; with a semi-colon" do + input = "ß" + output = [["Character", "ß"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: target; with a semi-colon" do + input = "⌖" + output = [["Character", "⌖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tau; with a semi-colon" do + input = "τ" + output = [["Character", "τ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tbrk; with a semi-colon" do + input = "⎴" + output = [["Character", "⎴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tcaron; with a semi-colon" do + input = "ť" + output = [["Character", "ť"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part41_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part41_test.exs new file mode 100644 index 00000000..7b088e79 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part41_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart41Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: tcedil; with a semi-colon" do + input = "ţ" + output = [["Character", "ţ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tcy; with a semi-colon" do + input = "т" + output = [["Character", "т"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tdot; with a semi-colon" do + input = "⃛" + output = [["Character", "⃛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: telrec; with a semi-colon" do + input = "⌕" + output = [["Character", "⌕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tfr; with a semi-colon" do + input = "𝔱" + output = [["Character", "𝔱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: there4; with a semi-colon" do + input = "∴" + output = [["Character", "∴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: therefore; with a semi-colon" do + input = "∴" + output = [["Character", "∴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: theta; with a semi-colon" do + input = "θ" + output = [["Character", "θ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thetasym; with a semi-colon" do + input = "ϑ" + output = [["Character", "ϑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thetav; with a semi-colon" do + input = "ϑ" + output = [["Character", "ϑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thickapprox; with a semi-colon" do + input = "≈" + output = [["Character", "≈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thicksim; with a semi-colon" do + input = "∼" + output = [["Character", "∼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thinsp; with a semi-colon" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thkap; with a semi-colon" do + input = "≈" + output = [["Character", "≈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thksim; with a semi-colon" do + input = "∼" + output = [["Character", "∼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thorn without a semi-colon" do + input = "þ" + output = [["Character", "þ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: thorn; with a semi-colon" do + input = "þ" + output = [["Character", "þ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tilde; with a semi-colon" do + input = "˜" + output = [["Character", "˜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: times without a semi-colon" do + input = "×" + output = [["Character", "×"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: times; with a semi-colon" do + input = "×" + output = [["Character", "×"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: timesb; with a semi-colon" do + input = "⊠" + output = [["Character", "⊠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: timesbar; with a semi-colon" do + input = "⨱" + output = [["Character", "⨱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: timesd; with a semi-colon" do + input = "⨰" + output = [["Character", "⨰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tint; with a semi-colon" do + input = "∭" + output = [["Character", "∭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: toea; with a semi-colon" do + input = "⤨" + output = [["Character", "⤨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: top; with a semi-colon" do + input = "⊤" + output = [["Character", "⊤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: topbot; with a semi-colon" do + input = "⌶" + output = [["Character", "⌶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: topcir; with a semi-colon" do + input = "⫱" + output = [["Character", "⫱"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: topf; with a semi-colon" do + input = "𝕥" + output = [["Character", "𝕥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: topfork; with a semi-colon" do + input = "⫚" + output = [["Character", "⫚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tosa; with a semi-colon" do + input = "⤩" + output = [["Character", "⤩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tprime; with a semi-colon" do + input = "‴" + output = [["Character", "‴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: trade; with a semi-colon" do + input = "™" + output = [["Character", "™"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triangle; with a semi-colon" do + input = "▵" + output = [["Character", "▵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triangledown; with a semi-colon" do + input = "▿" + output = [["Character", "▿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triangleleft; with a semi-colon" do + input = "◃" + output = [["Character", "◃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: trianglelefteq; with a semi-colon" do + input = "⊴" + output = [["Character", "⊴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triangleq; with a semi-colon" do + input = "≜" + output = [["Character", "≜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triangleright; with a semi-colon" do + input = "▹" + output = [["Character", "▹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: trianglerighteq; with a semi-colon" do + input = "⊵" + output = [["Character", "⊵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tridot; with a semi-colon" do + input = "◬" + output = [["Character", "◬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: trie; with a semi-colon" do + input = "≜" + output = [["Character", "≜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triminus; with a semi-colon" do + input = "⨺" + output = [["Character", "⨺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: triplus; with a semi-colon" do + input = "⨹" + output = [["Character", "⨹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: trisb; with a semi-colon" do + input = "⧍" + output = [["Character", "⧍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tritime; with a semi-colon" do + input = "⨻" + output = [["Character", "⨻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: trpezium; with a semi-colon" do + input = "⏢" + output = [["Character", "⏢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tscr; with a semi-colon" do + input = "𝓉" + output = [["Character", "𝓉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tscy; with a semi-colon" do + input = "ц" + output = [["Character", "ц"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tshcy; with a semi-colon" do + input = "ћ" + output = [["Character", "ћ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: tstrok; with a semi-colon" do + input = "ŧ" + output = [["Character", "ŧ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: twixt; with a semi-colon" do + input = "≬" + output = [["Character", "≬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: twoheadleftarrow; with a semi-colon" do + input = "↞" + output = [["Character", "↞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: twoheadrightarrow; with a semi-colon" do + input = "↠" + output = [["Character", "↠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uArr; with a semi-colon" do + input = "⇑" + output = [["Character", "⇑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uHar; with a semi-colon" do + input = "⥣" + output = [["Character", "⥣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uacute without a semi-colon" do + input = "ú" + output = [["Character", "ú"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uacute; with a semi-colon" do + input = "ú" + output = [["Character", "ú"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uarr; with a semi-colon" do + input = "↑" + output = [["Character", "↑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ubrcy; with a semi-colon" do + input = "ў" + output = [["Character", "ў"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ubreve; with a semi-colon" do + input = "ŭ" + output = [["Character", "ŭ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ucirc without a semi-colon" do + input = "û" + output = [["Character", "û"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ucirc; with a semi-colon" do + input = "û" + output = [["Character", "û"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ucy; with a semi-colon" do + input = "у" + output = [["Character", "у"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: udarr; with a semi-colon" do + input = "⇅" + output = [["Character", "⇅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: udblac; with a semi-colon" do + input = "ű" + output = [["Character", "ű"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: udhar; with a semi-colon" do + input = "⥮" + output = [["Character", "⥮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ufisht; with a semi-colon" do + input = "⥾" + output = [["Character", "⥾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ufr; with a semi-colon" do + input = "𝔲" + output = [["Character", "𝔲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ugrave without a semi-colon" do + input = "ù" + output = [["Character", "ù"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ugrave; with a semi-colon" do + input = "ù" + output = [["Character", "ù"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uharl; with a semi-colon" do + input = "↿" + output = [["Character", "↿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uharr; with a semi-colon" do + input = "↾" + output = [["Character", "↾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uhblk; with a semi-colon" do + input = "▀" + output = [["Character", "▀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ulcorn; with a semi-colon" do + input = "⌜" + output = [["Character", "⌜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ulcorner; with a semi-colon" do + input = "⌜" + output = [["Character", "⌜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ulcrop; with a semi-colon" do + input = "⌏" + output = [["Character", "⌏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ultri; with a semi-colon" do + input = "◸" + output = [["Character", "◸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: umacr; with a semi-colon" do + input = "ū" + output = [["Character", "ū"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uml without a semi-colon" do + input = "¨" + output = [["Character", "¨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uml; with a semi-colon" do + input = "¨" + output = [["Character", "¨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uogon; with a semi-colon" do + input = "ų" + output = [["Character", "ų"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uopf; with a semi-colon" do + input = "𝕦" + output = [["Character", "𝕦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uparrow; with a semi-colon" do + input = "↑" + output = [["Character", "↑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: updownarrow; with a semi-colon" do + input = "↕" + output = [["Character", "↕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: upharpoonleft; with a semi-colon" do + input = "↿" + output = [["Character", "↿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: upharpoonright; with a semi-colon" do + input = "↾" + output = [["Character", "↾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uplus; with a semi-colon" do + input = "⊎" + output = [["Character", "⊎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: upsi; with a semi-colon" do + input = "υ" + output = [["Character", "υ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: upsih; with a semi-colon" do + input = "ϒ" + output = [["Character", "ϒ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: upsilon; with a semi-colon" do + input = "υ" + output = [["Character", "υ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: upuparrows; with a semi-colon" do + input = "⇈" + output = [["Character", "⇈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: urcorn; with a semi-colon" do + input = "⌝" + output = [["Character", "⌝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: urcorner; with a semi-colon" do + input = "⌝" + output = [["Character", "⌝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: urcrop; with a semi-colon" do + input = "⌎" + output = [["Character", "⌎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uring; with a semi-colon" do + input = "ů" + output = [["Character", "ů"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: urtri; with a semi-colon" do + input = "◹" + output = [["Character", "◹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uscr; with a semi-colon" do + input = "𝓊" + output = [["Character", "𝓊"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: utdot; with a semi-colon" do + input = "⋰" + output = [["Character", "⋰"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: utilde; with a semi-colon" do + input = "ũ" + output = [["Character", "ũ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part42_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part42_test.exs new file mode 100644 index 00000000..8f8e6643 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part42_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart42Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: utri; with a semi-colon" do + input = "▵" + output = [["Character", "▵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: utrif; with a semi-colon" do + input = "▴" + output = [["Character", "▴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uuarr; with a semi-colon" do + input = "⇈" + output = [["Character", "⇈"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uuml without a semi-colon" do + input = "ü" + output = [["Character", "ü"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uuml; with a semi-colon" do + input = "ü" + output = [["Character", "ü"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: uwangle; with a semi-colon" do + input = "⦧" + output = [["Character", "⦧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vArr; with a semi-colon" do + input = "⇕" + output = [["Character", "⇕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vBar; with a semi-colon" do + input = "⫨" + output = [["Character", "⫨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vBarv; with a semi-colon" do + input = "⫩" + output = [["Character", "⫩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vDash; with a semi-colon" do + input = "⊨" + output = [["Character", "⊨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vangrt; with a semi-colon" do + input = "⦜" + output = [["Character", "⦜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varepsilon; with a semi-colon" do + input = "ϵ" + output = [["Character", "ϵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varkappa; with a semi-colon" do + input = "ϰ" + output = [["Character", "ϰ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varnothing; with a semi-colon" do + input = "∅" + output = [["Character", "∅"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varphi; with a semi-colon" do + input = "ϕ" + output = [["Character", "ϕ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varpi; with a semi-colon" do + input = "ϖ" + output = [["Character", "ϖ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varpropto; with a semi-colon" do + input = "∝" + output = [["Character", "∝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varr; with a semi-colon" do + input = "↕" + output = [["Character", "↕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varrho; with a semi-colon" do + input = "ϱ" + output = [["Character", "ϱ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varsigma; with a semi-colon" do + input = "ς" + output = [["Character", "ς"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varsubsetneq; with a semi-colon" do + input = "⊊︀" + output = [["Character", "⊊︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varsubsetneqq; with a semi-colon" do + input = "⫋︀" + output = [["Character", "⫋︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varsupsetneq; with a semi-colon" do + input = "⊋︀" + output = [["Character", "⊋︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: varsupsetneqq; with a semi-colon" do + input = "⫌︀" + output = [["Character", "⫌︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vartheta; with a semi-colon" do + input = "ϑ" + output = [["Character", "ϑ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vartriangleleft; with a semi-colon" do + input = "⊲" + output = [["Character", "⊲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vartriangleright; with a semi-colon" do + input = "⊳" + output = [["Character", "⊳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vcy; with a semi-colon" do + input = "в" + output = [["Character", "в"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vdash; with a semi-colon" do + input = "⊢" + output = [["Character", "⊢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vee; with a semi-colon" do + input = "∨" + output = [["Character", "∨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: veebar; with a semi-colon" do + input = "⊻" + output = [["Character", "⊻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: veeeq; with a semi-colon" do + input = "≚" + output = [["Character", "≚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vellip; with a semi-colon" do + input = "⋮" + output = [["Character", "⋮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: verbar; with a semi-colon" do + input = "|" + output = [["Character", "|"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vert; with a semi-colon" do + input = "|" + output = [["Character", "|"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vfr; with a semi-colon" do + input = "𝔳" + output = [["Character", "𝔳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vltri; with a semi-colon" do + input = "⊲" + output = [["Character", "⊲"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vnsub; with a semi-colon" do + input = "⊂⃒" + output = [["Character", "⊂⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vnsup; with a semi-colon" do + input = "⊃⃒" + output = [["Character", "⊃⃒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vopf; with a semi-colon" do + input = "𝕧" + output = [["Character", "𝕧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vprop; with a semi-colon" do + input = "∝" + output = [["Character", "∝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vrtri; with a semi-colon" do + input = "⊳" + output = [["Character", "⊳"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vscr; with a semi-colon" do + input = "𝓋" + output = [["Character", "𝓋"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vsubnE; with a semi-colon" do + input = "⫋︀" + output = [["Character", "⫋︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vsubne; with a semi-colon" do + input = "⊊︀" + output = [["Character", "⊊︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vsupnE; with a semi-colon" do + input = "⫌︀" + output = [["Character", "⫌︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vsupne; with a semi-colon" do + input = "⊋︀" + output = [["Character", "⊋︀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: vzigzag; with a semi-colon" do + input = "⦚" + output = [["Character", "⦚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wcirc; with a semi-colon" do + input = "ŵ" + output = [["Character", "ŵ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wedbar; with a semi-colon" do + input = "⩟" + output = [["Character", "⩟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wedge; with a semi-colon" do + input = "∧" + output = [["Character", "∧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wedgeq; with a semi-colon" do + input = "≙" + output = [["Character", "≙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: weierp; with a semi-colon" do + input = "℘" + output = [["Character", "℘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wfr; with a semi-colon" do + input = "𝔴" + output = [["Character", "𝔴"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wopf; with a semi-colon" do + input = "𝕨" + output = [["Character", "𝕨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wp; with a semi-colon" do + input = "℘" + output = [["Character", "℘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wr; with a semi-colon" do + input = "≀" + output = [["Character", "≀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wreath; with a semi-colon" do + input = "≀" + output = [["Character", "≀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: wscr; with a semi-colon" do + input = "𝓌" + output = [["Character", "𝓌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xcap; with a semi-colon" do + input = "⋂" + output = [["Character", "⋂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xcirc; with a semi-colon" do + input = "◯" + output = [["Character", "◯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xcup; with a semi-colon" do + input = "⋃" + output = [["Character", "⋃"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xdtri; with a semi-colon" do + input = "▽" + output = [["Character", "▽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xfr; with a semi-colon" do + input = "𝔵" + output = [["Character", "𝔵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xhArr; with a semi-colon" do + input = "⟺" + output = [["Character", "⟺"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xharr; with a semi-colon" do + input = "⟷" + output = [["Character", "⟷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xi; with a semi-colon" do + input = "ξ" + output = [["Character", "ξ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xlArr; with a semi-colon" do + input = "⟸" + output = [["Character", "⟸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xlarr; with a semi-colon" do + input = "⟵" + output = [["Character", "⟵"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xmap; with a semi-colon" do + input = "⟼" + output = [["Character", "⟼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xnis; with a semi-colon" do + input = "⋻" + output = [["Character", "⋻"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xodot; with a semi-colon" do + input = "⨀" + output = [["Character", "⨀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xopf; with a semi-colon" do + input = "𝕩" + output = [["Character", "𝕩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xoplus; with a semi-colon" do + input = "⨁" + output = [["Character", "⨁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xotime; with a semi-colon" do + input = "⨂" + output = [["Character", "⨂"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xrArr; with a semi-colon" do + input = "⟹" + output = [["Character", "⟹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xrarr; with a semi-colon" do + input = "⟶" + output = [["Character", "⟶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xscr; with a semi-colon" do + input = "𝓍" + output = [["Character", "𝓍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xsqcup; with a semi-colon" do + input = "⨆" + output = [["Character", "⨆"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xuplus; with a semi-colon" do + input = "⨄" + output = [["Character", "⨄"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xutri; with a semi-colon" do + input = "△" + output = [["Character", "△"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xvee; with a semi-colon" do + input = "⋁" + output = [["Character", "⋁"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: xwedge; with a semi-colon" do + input = "⋀" + output = [["Character", "⋀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yacute without a semi-colon" do + input = "ý" + output = [["Character", "ý"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yacute; with a semi-colon" do + input = "ý" + output = [["Character", "ý"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yacy; with a semi-colon" do + input = "я" + output = [["Character", "я"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ycirc; with a semi-colon" do + input = "ŷ" + output = [["Character", "ŷ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: ycy; with a semi-colon" do + input = "ы" + output = [["Character", "ы"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yen without a semi-colon" do + input = "¥" + output = [["Character", "¥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yen; with a semi-colon" do + input = "¥" + output = [["Character", "¥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yfr; with a semi-colon" do + input = "𝔶" + output = [["Character", "𝔶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yicy; with a semi-colon" do + input = "ї" + output = [["Character", "ї"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yopf; with a semi-colon" do + input = "𝕪" + output = [["Character", "𝕪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yscr; with a semi-colon" do + input = "𝓎" + output = [["Character", "𝓎"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yucy; with a semi-colon" do + input = "ю" + output = [["Character", "ю"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yuml without a semi-colon" do + input = "ÿ" + output = [["Character", "ÿ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: yuml; with a semi-colon" do + input = "ÿ" + output = [["Character", "ÿ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zacute; with a semi-colon" do + input = "ź" + output = [["Character", "ź"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zcaron; with a semi-colon" do + input = "ž" + output = [["Character", "ž"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zcy; with a semi-colon" do + input = "з" + output = [["Character", "з"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part43_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part43_test.exs new file mode 100644 index 00000000..08d56c77 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part43_test.exs @@ -0,0 +1,128 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart43Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Named entity: zdot; with a semi-colon" do + input = "ż" + output = [["Character", "ż"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zeetrf; with a semi-colon" do + input = "ℨ" + output = [["Character", "ℨ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zeta; with a semi-colon" do + input = "ζ" + output = [["Character", "ζ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zfr; with a semi-colon" do + input = "𝔷" + output = [["Character", "𝔷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zhcy; with a semi-colon" do + input = "ж" + output = [["Character", "ж"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zigrarr; with a semi-colon" do + input = "⇝" + output = [["Character", "⇝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zopf; with a semi-colon" do + input = "𝕫" + output = [["Character", "𝕫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zscr; with a semi-colon" do + input = "𝓏" + output = [["Character", "𝓏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zwj; with a semi-colon" do + input = "‍" + output = [["Character", "‍"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Named entity: zwnj; with a semi-colon" do + input = "‌" + output = [["Character", "‌"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part4_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part4_test.exs new file mode 100644 index 00000000..21b96afe --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part4_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart4Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: NotGreaterLess without a semi-colon" do + input = "&NotGreaterLess" + output = [["Character", "&NotGreaterLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotGreaterSlantEqual without a semi-colon" do + input = "&NotGreaterSlantEqual" + output = [["Character", "&NotGreaterSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotGreaterTilde without a semi-colon" do + input = "&NotGreaterTilde" + output = [["Character", "&NotGreaterTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotHumpDownHump without a semi-colon" do + input = "&NotHumpDownHump" + output = [["Character", "&NotHumpDownHump"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotHumpEqual without a semi-colon" do + input = "&NotHumpEqual" + output = [["Character", "&NotHumpEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLeftTriangle without a semi-colon" do + input = "&NotLeftTriangle" + output = [["Character", "&NotLeftTriangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLeftTriangleBar without a semi-colon" do + input = "&NotLeftTriangleBar" + output = [["Character", "&NotLeftTriangleBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLeftTriangleEqual without a semi-colon" do + input = "&NotLeftTriangleEqual" + output = [["Character", "&NotLeftTriangleEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLess without a semi-colon" do + input = "&NotLess" + output = [["Character", "&NotLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLessEqual without a semi-colon" do + input = "&NotLessEqual" + output = [["Character", "&NotLessEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLessGreater without a semi-colon" do + input = "&NotLessGreater" + output = [["Character", "&NotLessGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLessLess without a semi-colon" do + input = "&NotLessLess" + output = [["Character", "&NotLessLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLessSlantEqual without a semi-colon" do + input = "&NotLessSlantEqual" + output = [["Character", "&NotLessSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotLessTilde without a semi-colon" do + input = "&NotLessTilde" + output = [["Character", "&NotLessTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotNestedGreaterGreater without a semi-colon" do + input = "&NotNestedGreaterGreater" + output = [["Character", "&NotNestedGreaterGreater"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotNestedLessLess without a semi-colon" do + input = "&NotNestedLessLess" + output = [["Character", "&NotNestedLessLess"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotPrecedes without a semi-colon" do + input = "&NotPrecedes" + output = [["Character", "&NotPrecedes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotPrecedesEqual without a semi-colon" do + input = "&NotPrecedesEqual" + output = [["Character", "&NotPrecedesEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotPrecedesSlantEqual without a semi-colon" do + input = "&NotPrecedesSlantEqual" + output = [["Character", "&NotPrecedesSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotReverseElement without a semi-colon" do + input = "&NotReverseElement" + output = [["Character", "&NotReverseElement"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotRightTriangle without a semi-colon" do + input = "&NotRightTriangle" + output = [["Character", "&NotRightTriangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotRightTriangleBar without a semi-colon" do + input = "&NotRightTriangleBar" + output = [["Character", "&NotRightTriangleBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotRightTriangleEqual without a semi-colon" do + input = "&NotRightTriangleEqual" + output = [["Character", "&NotRightTriangleEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSquareSubset without a semi-colon" do + input = "&NotSquareSubset" + output = [["Character", "&NotSquareSubset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSquareSubsetEqual without a semi-colon" do + input = "&NotSquareSubsetEqual" + output = [["Character", "&NotSquareSubsetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSquareSuperset without a semi-colon" do + input = "&NotSquareSuperset" + output = [["Character", "&NotSquareSuperset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSquareSupersetEqual without a semi-colon" do + input = "&NotSquareSupersetEqual" + output = [["Character", "&NotSquareSupersetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSubset without a semi-colon" do + input = "&NotSubset" + output = [["Character", "&NotSubset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSubsetEqual without a semi-colon" do + input = "&NotSubsetEqual" + output = [["Character", "&NotSubsetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSucceeds without a semi-colon" do + input = "&NotSucceeds" + output = [["Character", "&NotSucceeds"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSucceedsEqual without a semi-colon" do + input = "&NotSucceedsEqual" + output = [["Character", "&NotSucceedsEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSucceedsSlantEqual without a semi-colon" do + input = "&NotSucceedsSlantEqual" + output = [["Character", "&NotSucceedsSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSucceedsTilde without a semi-colon" do + input = "&NotSucceedsTilde" + output = [["Character", "&NotSucceedsTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSuperset without a semi-colon" do + input = "&NotSuperset" + output = [["Character", "&NotSuperset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotSupersetEqual without a semi-colon" do + input = "&NotSupersetEqual" + output = [["Character", "&NotSupersetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotTilde without a semi-colon" do + input = "&NotTilde" + output = [["Character", "&NotTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotTildeEqual without a semi-colon" do + input = "&NotTildeEqual" + output = [["Character", "&NotTildeEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotTildeFullEqual without a semi-colon" do + input = "&NotTildeFullEqual" + output = [["Character", "&NotTildeFullEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotTildeTilde without a semi-colon" do + input = "&NotTildeTilde" + output = [["Character", "&NotTildeTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: NotVerticalBar without a semi-colon" do + input = "&NotVerticalBar" + output = [["Character", "&NotVerticalBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Nscr without a semi-colon" do + input = "&Nscr" + output = [["Character", "&Nscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Nu without a semi-colon" do + input = "&Nu" + output = [["Character", "&Nu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OElig without a semi-colon" do + input = "&OElig" + output = [["Character", "&OElig"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ocy without a semi-colon" do + input = "&Ocy" + output = [["Character", "&Ocy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Odblac without a semi-colon" do + input = "&Odblac" + output = [["Character", "&Odblac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ofr without a semi-colon" do + input = "&Ofr" + output = [["Character", "&Ofr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Omacr without a semi-colon" do + input = "&Omacr" + output = [["Character", "&Omacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Omega without a semi-colon" do + input = "&Omega" + output = [["Character", "&Omega"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Omicron without a semi-colon" do + input = "&Omicron" + output = [["Character", "&Omicron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Oopf without a semi-colon" do + input = "&Oopf" + output = [["Character", "&Oopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OpenCurlyDoubleQuote without a semi-colon" do + input = "&OpenCurlyDoubleQuote" + output = [["Character", "&OpenCurlyDoubleQuote"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OpenCurlyQuote without a semi-colon" do + input = "&OpenCurlyQuote" + output = [["Character", "&OpenCurlyQuote"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Or without a semi-colon" do + input = "&Or" + output = [["Character", "&Or"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Oscr without a semi-colon" do + input = "&Oscr" + output = [["Character", "&Oscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Otimes without a semi-colon" do + input = "&Otimes" + output = [["Character", "&Otimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OverBar without a semi-colon" do + input = "&OverBar" + output = [["Character", "&OverBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OverBrace without a semi-colon" do + input = "&OverBrace" + output = [["Character", "&OverBrace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OverBracket without a semi-colon" do + input = "&OverBracket" + output = [["Character", "&OverBracket"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: OverParenthesis without a semi-colon" do + input = "&OverParenthesis" + output = [["Character", "&OverParenthesis"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: PartialD without a semi-colon" do + input = "&PartialD" + output = [["Character", "&PartialD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Pcy without a semi-colon" do + input = "&Pcy" + output = [["Character", "&Pcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Pfr without a semi-colon" do + input = "&Pfr" + output = [["Character", "&Pfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Phi without a semi-colon" do + input = "&Phi" + output = [["Character", "&Phi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Pi without a semi-colon" do + input = "&Pi" + output = [["Character", "&Pi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: PlusMinus without a semi-colon" do + input = "&PlusMinus" + output = [["Character", "&PlusMinus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Poincareplane without a semi-colon" do + input = "&Poincareplane" + output = [["Character", "&Poincareplane"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Popf without a semi-colon" do + input = "&Popf" + output = [["Character", "&Popf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Pr without a semi-colon" do + input = "&Pr" + output = [["Character", "&Pr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Precedes without a semi-colon" do + input = "&Precedes" + output = [["Character", "&Precedes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: PrecedesEqual without a semi-colon" do + input = "&PrecedesEqual" + output = [["Character", "&PrecedesEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: PrecedesSlantEqual without a semi-colon" do + input = "&PrecedesSlantEqual" + output = [["Character", "&PrecedesSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: PrecedesTilde without a semi-colon" do + input = "&PrecedesTilde" + output = [["Character", "&PrecedesTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Prime without a semi-colon" do + input = "&Prime" + output = [["Character", "&Prime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Product without a semi-colon" do + input = "&Product" + output = [["Character", "&Product"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Proportion without a semi-colon" do + input = "&Proportion" + output = [["Character", "&Proportion"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Proportional without a semi-colon" do + input = "&Proportional" + output = [["Character", "&Proportional"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Pscr without a semi-colon" do + input = "&Pscr" + output = [["Character", "&Pscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Psi without a semi-colon" do + input = "&Psi" + output = [["Character", "&Psi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Qfr without a semi-colon" do + input = "&Qfr" + output = [["Character", "&Qfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Qopf without a semi-colon" do + input = "&Qopf" + output = [["Character", "&Qopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Qscr without a semi-colon" do + input = "&Qscr" + output = [["Character", "&Qscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RBarr without a semi-colon" do + input = "&RBarr" + output = [["Character", "&RBarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Racute without a semi-colon" do + input = "&Racute" + output = [["Character", "&Racute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rang without a semi-colon" do + input = "&Rang" + output = [["Character", "&Rang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rarr without a semi-colon" do + input = "&Rarr" + output = [["Character", "&Rarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rarrtl without a semi-colon" do + input = "&Rarrtl" + output = [["Character", "&Rarrtl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rcaron without a semi-colon" do + input = "&Rcaron" + output = [["Character", "&Rcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rcedil without a semi-colon" do + input = "&Rcedil" + output = [["Character", "&Rcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rcy without a semi-colon" do + input = "&Rcy" + output = [["Character", "&Rcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Re without a semi-colon" do + input = "&Re" + output = [["Character", "&Re"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ReverseElement without a semi-colon" do + input = "&ReverseElement" + output = [["Character", "&ReverseElement"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ReverseEquilibrium without a semi-colon" do + input = "&ReverseEquilibrium" + output = [["Character", "&ReverseEquilibrium"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ReverseUpEquilibrium without a semi-colon" do + input = "&ReverseUpEquilibrium" + output = [["Character", "&ReverseUpEquilibrium"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rfr without a semi-colon" do + input = "&Rfr" + output = [["Character", "&Rfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rho without a semi-colon" do + input = "&Rho" + output = [["Character", "&Rho"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightAngleBracket without a semi-colon" do + input = "&RightAngleBracket" + output = [["Character", "&RightAngleBracket"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightArrow without a semi-colon" do + input = "&RightArrow" + output = [["Character", "&RightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightArrowBar without a semi-colon" do + input = "&RightArrowBar" + output = [["Character", "&RightArrowBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightArrowLeftArrow without a semi-colon" do + input = "&RightArrowLeftArrow" + output = [["Character", "&RightArrowLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightCeiling without a semi-colon" do + input = "&RightCeiling" + output = [["Character", "&RightCeiling"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part5_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part5_test.exs new file mode 100644 index 00000000..427f46de --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part5_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart5Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: RightDoubleBracket without a semi-colon" do + input = "&RightDoubleBracket" + output = [["Character", "&RightDoubleBracket"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightDownTeeVector without a semi-colon" do + input = "&RightDownTeeVector" + output = [["Character", "&RightDownTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightDownVector without a semi-colon" do + input = "&RightDownVector" + output = [["Character", "&RightDownVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightDownVectorBar without a semi-colon" do + input = "&RightDownVectorBar" + output = [["Character", "&RightDownVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightFloor without a semi-colon" do + input = "&RightFloor" + output = [["Character", "&RightFloor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightTee without a semi-colon" do + input = "&RightTee" + output = [["Character", "&RightTee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightTeeArrow without a semi-colon" do + input = "&RightTeeArrow" + output = [["Character", "&RightTeeArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightTeeVector without a semi-colon" do + input = "&RightTeeVector" + output = [["Character", "&RightTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightTriangle without a semi-colon" do + input = "&RightTriangle" + output = [["Character", "&RightTriangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightTriangleBar without a semi-colon" do + input = "&RightTriangleBar" + output = [["Character", "&RightTriangleBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightTriangleEqual without a semi-colon" do + input = "&RightTriangleEqual" + output = [["Character", "&RightTriangleEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightUpDownVector without a semi-colon" do + input = "&RightUpDownVector" + output = [["Character", "&RightUpDownVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightUpTeeVector without a semi-colon" do + input = "&RightUpTeeVector" + output = [["Character", "&RightUpTeeVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightUpVector without a semi-colon" do + input = "&RightUpVector" + output = [["Character", "&RightUpVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightUpVectorBar without a semi-colon" do + input = "&RightUpVectorBar" + output = [["Character", "&RightUpVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightVector without a semi-colon" do + input = "&RightVector" + output = [["Character", "&RightVector"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RightVectorBar without a semi-colon" do + input = "&RightVectorBar" + output = [["Character", "&RightVectorBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rightarrow without a semi-colon" do + input = "&Rightarrow" + output = [["Character", "&Rightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ropf without a semi-colon" do + input = "&Ropf" + output = [["Character", "&Ropf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RoundImplies without a semi-colon" do + input = "&RoundImplies" + output = [["Character", "&RoundImplies"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rrightarrow without a semi-colon" do + input = "&Rrightarrow" + output = [["Character", "&Rrightarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rscr without a semi-colon" do + input = "&Rscr" + output = [["Character", "&Rscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Rsh without a semi-colon" do + input = "&Rsh" + output = [["Character", "&Rsh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: RuleDelayed without a semi-colon" do + input = "&RuleDelayed" + output = [["Character", "&RuleDelayed"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SHCHcy without a semi-colon" do + input = "&SHCHcy" + output = [["Character", "&SHCHcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SHcy without a semi-colon" do + input = "&SHcy" + output = [["Character", "&SHcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SOFTcy without a semi-colon" do + input = "&SOFTcy" + output = [["Character", "&SOFTcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sacute without a semi-colon" do + input = "&Sacute" + output = [["Character", "&Sacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sc without a semi-colon" do + input = "&Sc" + output = [["Character", "&Sc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Scaron without a semi-colon" do + input = "&Scaron" + output = [["Character", "&Scaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Scedil without a semi-colon" do + input = "&Scedil" + output = [["Character", "&Scedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Scirc without a semi-colon" do + input = "&Scirc" + output = [["Character", "&Scirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Scy without a semi-colon" do + input = "&Scy" + output = [["Character", "&Scy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sfr without a semi-colon" do + input = "&Sfr" + output = [["Character", "&Sfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ShortDownArrow without a semi-colon" do + input = "&ShortDownArrow" + output = [["Character", "&ShortDownArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ShortLeftArrow without a semi-colon" do + input = "&ShortLeftArrow" + output = [["Character", "&ShortLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ShortRightArrow without a semi-colon" do + input = "&ShortRightArrow" + output = [["Character", "&ShortRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ShortUpArrow without a semi-colon" do + input = "&ShortUpArrow" + output = [["Character", "&ShortUpArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sigma without a semi-colon" do + input = "&Sigma" + output = [["Character", "&Sigma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SmallCircle without a semi-colon" do + input = "&SmallCircle" + output = [["Character", "&SmallCircle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sopf without a semi-colon" do + input = "&Sopf" + output = [["Character", "&Sopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sqrt without a semi-colon" do + input = "&Sqrt" + output = [["Character", "&Sqrt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Square without a semi-colon" do + input = "&Square" + output = [["Character", "&Square"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SquareIntersection without a semi-colon" do + input = "&SquareIntersection" + output = [["Character", "&SquareIntersection"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SquareSubset without a semi-colon" do + input = "&SquareSubset" + output = [["Character", "&SquareSubset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SquareSubsetEqual without a semi-colon" do + input = "&SquareSubsetEqual" + output = [["Character", "&SquareSubsetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SquareSuperset without a semi-colon" do + input = "&SquareSuperset" + output = [["Character", "&SquareSuperset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SquareSupersetEqual without a semi-colon" do + input = "&SquareSupersetEqual" + output = [["Character", "&SquareSupersetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SquareUnion without a semi-colon" do + input = "&SquareUnion" + output = [["Character", "&SquareUnion"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sscr without a semi-colon" do + input = "&Sscr" + output = [["Character", "&Sscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Star without a semi-colon" do + input = "&Star" + output = [["Character", "&Star"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sub without a semi-colon" do + input = "&Sub" + output = [["Character", "&Sub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Subset without a semi-colon" do + input = "&Subset" + output = [["Character", "&Subset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SubsetEqual without a semi-colon" do + input = "&SubsetEqual" + output = [["Character", "&SubsetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Succeeds without a semi-colon" do + input = "&Succeeds" + output = [["Character", "&Succeeds"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SucceedsEqual without a semi-colon" do + input = "&SucceedsEqual" + output = [["Character", "&SucceedsEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SucceedsSlantEqual without a semi-colon" do + input = "&SucceedsSlantEqual" + output = [["Character", "&SucceedsSlantEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SucceedsTilde without a semi-colon" do + input = "&SucceedsTilde" + output = [["Character", "&SucceedsTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SuchThat without a semi-colon" do + input = "&SuchThat" + output = [["Character", "&SuchThat"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sum without a semi-colon" do + input = "&Sum" + output = [["Character", "&Sum"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Sup without a semi-colon" do + input = "&Sup" + output = [["Character", "&Sup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Superset without a semi-colon" do + input = "&Superset" + output = [["Character", "&Superset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: SupersetEqual without a semi-colon" do + input = "&SupersetEqual" + output = [["Character", "&SupersetEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Supset without a semi-colon" do + input = "&Supset" + output = [["Character", "&Supset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TRADE without a semi-colon" do + input = "&TRADE" + output = [["Character", "&TRADE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TSHcy without a semi-colon" do + input = "&TSHcy" + output = [["Character", "&TSHcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TScy without a semi-colon" do + input = "&TScy" + output = [["Character", "&TScy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tab without a semi-colon" do + input = "&Tab" + output = [["Character", "&Tab"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tau without a semi-colon" do + input = "&Tau" + output = [["Character", "&Tau"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tcaron without a semi-colon" do + input = "&Tcaron" + output = [["Character", "&Tcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tcedil without a semi-colon" do + input = "&Tcedil" + output = [["Character", "&Tcedil"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tcy without a semi-colon" do + input = "&Tcy" + output = [["Character", "&Tcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tfr without a semi-colon" do + input = "&Tfr" + output = [["Character", "&Tfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Therefore without a semi-colon" do + input = "&Therefore" + output = [["Character", "&Therefore"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Theta without a semi-colon" do + input = "&Theta" + output = [["Character", "&Theta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ThickSpace without a semi-colon" do + input = "&ThickSpace" + output = [["Character", "&ThickSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ThinSpace without a semi-colon" do + input = "&ThinSpace" + output = [["Character", "&ThinSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tilde without a semi-colon" do + input = "&Tilde" + output = [["Character", "&Tilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TildeEqual without a semi-colon" do + input = "&TildeEqual" + output = [["Character", "&TildeEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TildeFullEqual without a semi-colon" do + input = "&TildeFullEqual" + output = [["Character", "&TildeFullEqual"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TildeTilde without a semi-colon" do + input = "&TildeTilde" + output = [["Character", "&TildeTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Topf without a semi-colon" do + input = "&Topf" + output = [["Character", "&Topf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: TripleDot without a semi-colon" do + input = "&TripleDot" + output = [["Character", "&TripleDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tscr without a semi-colon" do + input = "&Tscr" + output = [["Character", "&Tscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Tstrok without a semi-colon" do + input = "&Tstrok" + output = [["Character", "&Tstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Uarr without a semi-colon" do + input = "&Uarr" + output = [["Character", "&Uarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Uarrocir without a semi-colon" do + input = "&Uarrocir" + output = [["Character", "&Uarrocir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ubrcy without a semi-colon" do + input = "&Ubrcy" + output = [["Character", "&Ubrcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ubreve without a semi-colon" do + input = "&Ubreve" + output = [["Character", "&Ubreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ucy without a semi-colon" do + input = "&Ucy" + output = [["Character", "&Ucy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Udblac without a semi-colon" do + input = "&Udblac" + output = [["Character", "&Udblac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ufr without a semi-colon" do + input = "&Ufr" + output = [["Character", "&Ufr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Umacr without a semi-colon" do + input = "&Umacr" + output = [["Character", "&Umacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UnderBar without a semi-colon" do + input = "&UnderBar" + output = [["Character", "&UnderBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UnderBrace without a semi-colon" do + input = "&UnderBrace" + output = [["Character", "&UnderBrace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UnderBracket without a semi-colon" do + input = "&UnderBracket" + output = [["Character", "&UnderBracket"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UnderParenthesis without a semi-colon" do + input = "&UnderParenthesis" + output = [["Character", "&UnderParenthesis"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Union without a semi-colon" do + input = "&Union" + output = [["Character", "&Union"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UnionPlus without a semi-colon" do + input = "&UnionPlus" + output = [["Character", "&UnionPlus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Uogon without a semi-colon" do + input = "&Uogon" + output = [["Character", "&Uogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part6_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part6_test.exs new file mode 100644 index 00000000..6c34fecc --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part6_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart6Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: Uopf without a semi-colon" do + input = "&Uopf" + output = [["Character", "&Uopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpArrow without a semi-colon" do + input = "&UpArrow" + output = [["Character", "&UpArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpArrowBar without a semi-colon" do + input = "&UpArrowBar" + output = [["Character", "&UpArrowBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpArrowDownArrow without a semi-colon" do + input = "&UpArrowDownArrow" + output = [["Character", "&UpArrowDownArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpDownArrow without a semi-colon" do + input = "&UpDownArrow" + output = [["Character", "&UpDownArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpEquilibrium without a semi-colon" do + input = "&UpEquilibrium" + output = [["Character", "&UpEquilibrium"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpTee without a semi-colon" do + input = "&UpTee" + output = [["Character", "&UpTee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpTeeArrow without a semi-colon" do + input = "&UpTeeArrow" + output = [["Character", "&UpTeeArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Uparrow without a semi-colon" do + input = "&Uparrow" + output = [["Character", "&Uparrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Updownarrow without a semi-colon" do + input = "&Updownarrow" + output = [["Character", "&Updownarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpperLeftArrow without a semi-colon" do + input = "&UpperLeftArrow" + output = [["Character", "&UpperLeftArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: UpperRightArrow without a semi-colon" do + input = "&UpperRightArrow" + output = [["Character", "&UpperRightArrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Upsi without a semi-colon" do + input = "&Upsi" + output = [["Character", "&Upsi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Upsilon without a semi-colon" do + input = "&Upsilon" + output = [["Character", "&Upsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Uring without a semi-colon" do + input = "&Uring" + output = [["Character", "&Uring"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Uscr without a semi-colon" do + input = "&Uscr" + output = [["Character", "&Uscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Utilde without a semi-colon" do + input = "&Utilde" + output = [["Character", "&Utilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: VDash without a semi-colon" do + input = "&VDash" + output = [["Character", "&VDash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vbar without a semi-colon" do + input = "&Vbar" + output = [["Character", "&Vbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vcy without a semi-colon" do + input = "&Vcy" + output = [["Character", "&Vcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vdash without a semi-colon" do + input = "&Vdash" + output = [["Character", "&Vdash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vdashl without a semi-colon" do + input = "&Vdashl" + output = [["Character", "&Vdashl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vee without a semi-colon" do + input = "&Vee" + output = [["Character", "&Vee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Verbar without a semi-colon" do + input = "&Verbar" + output = [["Character", "&Verbar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vert without a semi-colon" do + input = "&Vert" + output = [["Character", "&Vert"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: VerticalBar without a semi-colon" do + input = "&VerticalBar" + output = [["Character", "&VerticalBar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: VerticalLine without a semi-colon" do + input = "&VerticalLine" + output = [["Character", "&VerticalLine"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: VerticalSeparator without a semi-colon" do + input = "&VerticalSeparator" + output = [["Character", "&VerticalSeparator"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: VerticalTilde without a semi-colon" do + input = "&VerticalTilde" + output = [["Character", "&VerticalTilde"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: VeryThinSpace without a semi-colon" do + input = "&VeryThinSpace" + output = [["Character", "&VeryThinSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vfr without a semi-colon" do + input = "&Vfr" + output = [["Character", "&Vfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vopf without a semi-colon" do + input = "&Vopf" + output = [["Character", "&Vopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vscr without a semi-colon" do + input = "&Vscr" + output = [["Character", "&Vscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Vvdash without a semi-colon" do + input = "&Vvdash" + output = [["Character", "&Vvdash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Wcirc without a semi-colon" do + input = "&Wcirc" + output = [["Character", "&Wcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Wedge without a semi-colon" do + input = "&Wedge" + output = [["Character", "&Wedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Wfr without a semi-colon" do + input = "&Wfr" + output = [["Character", "&Wfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Wopf without a semi-colon" do + input = "&Wopf" + output = [["Character", "&Wopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Wscr without a semi-colon" do + input = "&Wscr" + output = [["Character", "&Wscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Xfr without a semi-colon" do + input = "&Xfr" + output = [["Character", "&Xfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Xi without a semi-colon" do + input = "&Xi" + output = [["Character", "&Xi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Xopf without a semi-colon" do + input = "&Xopf" + output = [["Character", "&Xopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Xscr without a semi-colon" do + input = "&Xscr" + output = [["Character", "&Xscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: YAcy without a semi-colon" do + input = "&YAcy" + output = [["Character", "&YAcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: YIcy without a semi-colon" do + input = "&YIcy" + output = [["Character", "&YIcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: YUcy without a semi-colon" do + input = "&YUcy" + output = [["Character", "&YUcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ycirc without a semi-colon" do + input = "&Ycirc" + output = [["Character", "&Ycirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Ycy without a semi-colon" do + input = "&Ycy" + output = [["Character", "&Ycy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Yfr without a semi-colon" do + input = "&Yfr" + output = [["Character", "&Yfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Yopf without a semi-colon" do + input = "&Yopf" + output = [["Character", "&Yopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Yscr without a semi-colon" do + input = "&Yscr" + output = [["Character", "&Yscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Yuml without a semi-colon" do + input = "&Yuml" + output = [["Character", "&Yuml"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ZHcy without a semi-colon" do + input = "&ZHcy" + output = [["Character", "&ZHcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zacute without a semi-colon" do + input = "&Zacute" + output = [["Character", "&Zacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zcaron without a semi-colon" do + input = "&Zcaron" + output = [["Character", "&Zcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zcy without a semi-colon" do + input = "&Zcy" + output = [["Character", "&Zcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zdot without a semi-colon" do + input = "&Zdot" + output = [["Character", "&Zdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ZeroWidthSpace without a semi-colon" do + input = "&ZeroWidthSpace" + output = [["Character", "&ZeroWidthSpace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zeta without a semi-colon" do + input = "&Zeta" + output = [["Character", "&Zeta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zfr without a semi-colon" do + input = "&Zfr" + output = [["Character", "&Zfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zopf without a semi-colon" do + input = "&Zopf" + output = [["Character", "&Zopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: Zscr without a semi-colon" do + input = "&Zscr" + output = [["Character", "&Zscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: abreve without a semi-colon" do + input = "&abreve" + output = [["Character", "&abreve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ac without a semi-colon" do + input = "&ac" + output = [["Character", "&ac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: acE without a semi-colon" do + input = "&acE" + output = [["Character", "&acE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: acd without a semi-colon" do + input = "&acd" + output = [["Character", "&acd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: acy without a semi-colon" do + input = "&acy" + output = [["Character", "&acy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: af without a semi-colon" do + input = "&af" + output = [["Character", "&af"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: afr without a semi-colon" do + input = "&afr" + output = [["Character", "&afr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: alefsym without a semi-colon" do + input = "&alefsym" + output = [["Character", "&alefsym"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: aleph without a semi-colon" do + input = "&aleph" + output = [["Character", "&aleph"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: alpha without a semi-colon" do + input = "&alpha" + output = [["Character", "&alpha"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: amacr without a semi-colon" do + input = "&amacr" + output = [["Character", "&amacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: amalg without a semi-colon" do + input = "&amalg" + output = [["Character", "&amalg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: and without a semi-colon" do + input = "&and" + output = [["Character", "&and"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: andand without a semi-colon" do + input = "&andand" + output = [["Character", "&andand"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: andd without a semi-colon" do + input = "&andd" + output = [["Character", "&andd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: andslope without a semi-colon" do + input = "&andslope" + output = [["Character", "&andslope"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: andv without a semi-colon" do + input = "&andv" + output = [["Character", "&andv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ang without a semi-colon" do + input = "&ang" + output = [["Character", "&ang"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ange without a semi-colon" do + input = "&ange" + output = [["Character", "&ange"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angle without a semi-colon" do + input = "&angle" + output = [["Character", "&angle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsd without a semi-colon" do + input = "&angmsd" + output = [["Character", "&angmsd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdaa without a semi-colon" do + input = "&angmsdaa" + output = [["Character", "&angmsdaa"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdab without a semi-colon" do + input = "&angmsdab" + output = [["Character", "&angmsdab"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdac without a semi-colon" do + input = "&angmsdac" + output = [["Character", "&angmsdac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdad without a semi-colon" do + input = "&angmsdad" + output = [["Character", "&angmsdad"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdae without a semi-colon" do + input = "&angmsdae" + output = [["Character", "&angmsdae"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdaf without a semi-colon" do + input = "&angmsdaf" + output = [["Character", "&angmsdaf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdag without a semi-colon" do + input = "&angmsdag" + output = [["Character", "&angmsdag"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angmsdah without a semi-colon" do + input = "&angmsdah" + output = [["Character", "&angmsdah"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angrt without a semi-colon" do + input = "&angrt" + output = [["Character", "&angrt"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angrtvb without a semi-colon" do + input = "&angrtvb" + output = [["Character", "&angrtvb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angrtvbd without a semi-colon" do + input = "&angrtvbd" + output = [["Character", "&angrtvbd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angsph without a semi-colon" do + input = "&angsph" + output = [["Character", "&angsph"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angst without a semi-colon" do + input = "&angst" + output = [["Character", "&angst"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: angzarr without a semi-colon" do + input = "&angzarr" + output = [["Character", "&angzarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: aogon without a semi-colon" do + input = "&aogon" + output = [["Character", "&aogon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: aopf without a semi-colon" do + input = "&aopf" + output = [["Character", "&aopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ap without a semi-colon" do + input = "&ap" + output = [["Character", "&ap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part7_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part7_test.exs new file mode 100644 index 00000000..af887045 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part7_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart7Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: apE without a semi-colon" do + input = "&apE" + output = [["Character", "&apE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: apacir without a semi-colon" do + input = "&apacir" + output = [["Character", "&apacir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ape without a semi-colon" do + input = "&ape" + output = [["Character", "&ape"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: apid without a semi-colon" do + input = "&apid" + output = [["Character", "&apid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: apos without a semi-colon" do + input = "&apos" + output = [["Character", "&apos"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: approx without a semi-colon" do + input = "&approx" + output = [["Character", "&approx"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: approxeq without a semi-colon" do + input = "&approxeq" + output = [["Character", "&approxeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ascr without a semi-colon" do + input = "&ascr" + output = [["Character", "&ascr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ast without a semi-colon" do + input = "&ast" + output = [["Character", "&ast"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: asymp without a semi-colon" do + input = "&asymp" + output = [["Character", "&asymp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: asympeq without a semi-colon" do + input = "&asympeq" + output = [["Character", "&asympeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: awconint without a semi-colon" do + input = "&awconint" + output = [["Character", "&awconint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: awint without a semi-colon" do + input = "&awint" + output = [["Character", "&awint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bNot without a semi-colon" do + input = "&bNot" + output = [["Character", "&bNot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: backcong without a semi-colon" do + input = "&backcong" + output = [["Character", "&backcong"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: backepsilon without a semi-colon" do + input = "&backepsilon" + output = [["Character", "&backepsilon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: backprime without a semi-colon" do + input = "&backprime" + output = [["Character", "&backprime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: backsim without a semi-colon" do + input = "&backsim" + output = [["Character", "&backsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: backsimeq without a semi-colon" do + input = "&backsimeq" + output = [["Character", "&backsimeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: barvee without a semi-colon" do + input = "&barvee" + output = [["Character", "&barvee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: barwed without a semi-colon" do + input = "&barwed" + output = [["Character", "&barwed"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: barwedge without a semi-colon" do + input = "&barwedge" + output = [["Character", "&barwedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bbrk without a semi-colon" do + input = "&bbrk" + output = [["Character", "&bbrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bbrktbrk without a semi-colon" do + input = "&bbrktbrk" + output = [["Character", "&bbrktbrk"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bcong without a semi-colon" do + input = "&bcong" + output = [["Character", "&bcong"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bcy without a semi-colon" do + input = "&bcy" + output = [["Character", "&bcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bdquo without a semi-colon" do + input = "&bdquo" + output = [["Character", "&bdquo"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: becaus without a semi-colon" do + input = "&becaus" + output = [["Character", "&becaus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: because without a semi-colon" do + input = "&because" + output = [["Character", "&because"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bemptyv without a semi-colon" do + input = "&bemptyv" + output = [["Character", "&bemptyv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bepsi without a semi-colon" do + input = "&bepsi" + output = [["Character", "&bepsi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bernou without a semi-colon" do + input = "&bernou" + output = [["Character", "&bernou"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: beta without a semi-colon" do + input = "&beta" + output = [["Character", "&beta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: beth without a semi-colon" do + input = "&beth" + output = [["Character", "&beth"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: between without a semi-colon" do + input = "&between" + output = [["Character", "&between"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bfr without a semi-colon" do + input = "&bfr" + output = [["Character", "&bfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigcap without a semi-colon" do + input = "&bigcap" + output = [["Character", "&bigcap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigcirc without a semi-colon" do + input = "&bigcirc" + output = [["Character", "&bigcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigcup without a semi-colon" do + input = "&bigcup" + output = [["Character", "&bigcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigodot without a semi-colon" do + input = "&bigodot" + output = [["Character", "&bigodot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigoplus without a semi-colon" do + input = "&bigoplus" + output = [["Character", "&bigoplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigotimes without a semi-colon" do + input = "&bigotimes" + output = [["Character", "&bigotimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigsqcup without a semi-colon" do + input = "&bigsqcup" + output = [["Character", "&bigsqcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigstar without a semi-colon" do + input = "&bigstar" + output = [["Character", "&bigstar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigtriangledown without a semi-colon" do + input = "&bigtriangledown" + output = [["Character", "&bigtriangledown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigtriangleup without a semi-colon" do + input = "&bigtriangleup" + output = [["Character", "&bigtriangleup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: biguplus without a semi-colon" do + input = "&biguplus" + output = [["Character", "&biguplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigvee without a semi-colon" do + input = "&bigvee" + output = [["Character", "&bigvee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bigwedge without a semi-colon" do + input = "&bigwedge" + output = [["Character", "&bigwedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bkarow without a semi-colon" do + input = "&bkarow" + output = [["Character", "&bkarow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blacklozenge without a semi-colon" do + input = "&blacklozenge" + output = [["Character", "&blacklozenge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blacksquare without a semi-colon" do + input = "&blacksquare" + output = [["Character", "&blacksquare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blacktriangle without a semi-colon" do + input = "&blacktriangle" + output = [["Character", "&blacktriangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blacktriangledown without a semi-colon" do + input = "&blacktriangledown" + output = [["Character", "&blacktriangledown"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blacktriangleleft without a semi-colon" do + input = "&blacktriangleleft" + output = [["Character", "&blacktriangleleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blacktriangleright without a semi-colon" do + input = "&blacktriangleright" + output = [["Character", "&blacktriangleright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blank without a semi-colon" do + input = "&blank" + output = [["Character", "&blank"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blk12 without a semi-colon" do + input = "&blk12" + output = [["Character", "&blk12"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blk14 without a semi-colon" do + input = "&blk14" + output = [["Character", "&blk14"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: blk34 without a semi-colon" do + input = "&blk34" + output = [["Character", "&blk34"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: block without a semi-colon" do + input = "&block" + output = [["Character", "&block"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bne without a semi-colon" do + input = "&bne" + output = [["Character", "&bne"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bnequiv without a semi-colon" do + input = "&bnequiv" + output = [["Character", "&bnequiv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bnot without a semi-colon" do + input = "&bnot" + output = [["Character", "&bnot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bopf without a semi-colon" do + input = "&bopf" + output = [["Character", "&bopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bot without a semi-colon" do + input = "&bot" + output = [["Character", "&bot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bottom without a semi-colon" do + input = "&bottom" + output = [["Character", "&bottom"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bowtie without a semi-colon" do + input = "&bowtie" + output = [["Character", "&bowtie"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxDL without a semi-colon" do + input = "&boxDL" + output = [["Character", "&boxDL"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxDR without a semi-colon" do + input = "&boxDR" + output = [["Character", "&boxDR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxDl without a semi-colon" do + input = "&boxDl" + output = [["Character", "&boxDl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxDr without a semi-colon" do + input = "&boxDr" + output = [["Character", "&boxDr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxH without a semi-colon" do + input = "&boxH" + output = [["Character", "&boxH"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxHD without a semi-colon" do + input = "&boxHD" + output = [["Character", "&boxHD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxHU without a semi-colon" do + input = "&boxHU" + output = [["Character", "&boxHU"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxHd without a semi-colon" do + input = "&boxHd" + output = [["Character", "&boxHd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxHu without a semi-colon" do + input = "&boxHu" + output = [["Character", "&boxHu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxUL without a semi-colon" do + input = "&boxUL" + output = [["Character", "&boxUL"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxUR without a semi-colon" do + input = "&boxUR" + output = [["Character", "&boxUR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxUl without a semi-colon" do + input = "&boxUl" + output = [["Character", "&boxUl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxUr without a semi-colon" do + input = "&boxUr" + output = [["Character", "&boxUr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxV without a semi-colon" do + input = "&boxV" + output = [["Character", "&boxV"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxVH without a semi-colon" do + input = "&boxVH" + output = [["Character", "&boxVH"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxVL without a semi-colon" do + input = "&boxVL" + output = [["Character", "&boxVL"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxVR without a semi-colon" do + input = "&boxVR" + output = [["Character", "&boxVR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxVh without a semi-colon" do + input = "&boxVh" + output = [["Character", "&boxVh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxVl without a semi-colon" do + input = "&boxVl" + output = [["Character", "&boxVl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxVr without a semi-colon" do + input = "&boxVr" + output = [["Character", "&boxVr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxbox without a semi-colon" do + input = "&boxbox" + output = [["Character", "&boxbox"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxdL without a semi-colon" do + input = "&boxdL" + output = [["Character", "&boxdL"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxdR without a semi-colon" do + input = "&boxdR" + output = [["Character", "&boxdR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxdl without a semi-colon" do + input = "&boxdl" + output = [["Character", "&boxdl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxdr without a semi-colon" do + input = "&boxdr" + output = [["Character", "&boxdr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxh without a semi-colon" do + input = "&boxh" + output = [["Character", "&boxh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxhD without a semi-colon" do + input = "&boxhD" + output = [["Character", "&boxhD"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxhU without a semi-colon" do + input = "&boxhU" + output = [["Character", "&boxhU"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxhd without a semi-colon" do + input = "&boxhd" + output = [["Character", "&boxhd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxhu without a semi-colon" do + input = "&boxhu" + output = [["Character", "&boxhu"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxminus without a semi-colon" do + input = "&boxminus" + output = [["Character", "&boxminus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxplus without a semi-colon" do + input = "&boxplus" + output = [["Character", "&boxplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part8_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part8_test.exs new file mode 100644 index 00000000..0ae4c8fc --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part8_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart8Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: boxtimes without a semi-colon" do + input = "&boxtimes" + output = [["Character", "&boxtimes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxuL without a semi-colon" do + input = "&boxuL" + output = [["Character", "&boxuL"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxuR without a semi-colon" do + input = "&boxuR" + output = [["Character", "&boxuR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxul without a semi-colon" do + input = "&boxul" + output = [["Character", "&boxul"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxur without a semi-colon" do + input = "&boxur" + output = [["Character", "&boxur"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxv without a semi-colon" do + input = "&boxv" + output = [["Character", "&boxv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxvH without a semi-colon" do + input = "&boxvH" + output = [["Character", "&boxvH"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxvL without a semi-colon" do + input = "&boxvL" + output = [["Character", "&boxvL"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxvR without a semi-colon" do + input = "&boxvR" + output = [["Character", "&boxvR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxvh without a semi-colon" do + input = "&boxvh" + output = [["Character", "&boxvh"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxvl without a semi-colon" do + input = "&boxvl" + output = [["Character", "&boxvl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: boxvr without a semi-colon" do + input = "&boxvr" + output = [["Character", "&boxvr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bprime without a semi-colon" do + input = "&bprime" + output = [["Character", "&bprime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: breve without a semi-colon" do + input = "&breve" + output = [["Character", "&breve"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bscr without a semi-colon" do + input = "&bscr" + output = [["Character", "&bscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bsemi without a semi-colon" do + input = "&bsemi" + output = [["Character", "&bsemi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bsim without a semi-colon" do + input = "&bsim" + output = [["Character", "&bsim"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bsime without a semi-colon" do + input = "&bsime" + output = [["Character", "&bsime"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bsol without a semi-colon" do + input = "&bsol" + output = [["Character", "&bsol"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bsolb without a semi-colon" do + input = "&bsolb" + output = [["Character", "&bsolb"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bsolhsub without a semi-colon" do + input = "&bsolhsub" + output = [["Character", "&bsolhsub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bull without a semi-colon" do + input = "&bull" + output = [["Character", "&bull"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bullet without a semi-colon" do + input = "&bullet" + output = [["Character", "&bullet"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bump without a semi-colon" do + input = "&bump" + output = [["Character", "&bump"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bumpE without a semi-colon" do + input = "&bumpE" + output = [["Character", "&bumpE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bumpe without a semi-colon" do + input = "&bumpe" + output = [["Character", "&bumpe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: bumpeq without a semi-colon" do + input = "&bumpeq" + output = [["Character", "&bumpeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cacute without a semi-colon" do + input = "&cacute" + output = [["Character", "&cacute"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cap without a semi-colon" do + input = "&cap" + output = [["Character", "&cap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: capand without a semi-colon" do + input = "&capand" + output = [["Character", "&capand"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: capbrcup without a semi-colon" do + input = "&capbrcup" + output = [["Character", "&capbrcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: capcap without a semi-colon" do + input = "&capcap" + output = [["Character", "&capcap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: capcup without a semi-colon" do + input = "&capcup" + output = [["Character", "&capcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: capdot without a semi-colon" do + input = "&capdot" + output = [["Character", "&capdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: caps without a semi-colon" do + input = "&caps" + output = [["Character", "&caps"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: caret without a semi-colon" do + input = "&caret" + output = [["Character", "&caret"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: caron without a semi-colon" do + input = "&caron" + output = [["Character", "&caron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ccaps without a semi-colon" do + input = "&ccaps" + output = [["Character", "&ccaps"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ccaron without a semi-colon" do + input = "&ccaron" + output = [["Character", "&ccaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ccirc without a semi-colon" do + input = "&ccirc" + output = [["Character", "&ccirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ccups without a semi-colon" do + input = "&ccups" + output = [["Character", "&ccups"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ccupssm without a semi-colon" do + input = "&ccupssm" + output = [["Character", "&ccupssm"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cdot without a semi-colon" do + input = "&cdot" + output = [["Character", "&cdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cemptyv without a semi-colon" do + input = "&cemptyv" + output = [["Character", "&cemptyv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cfr without a semi-colon" do + input = "&cfr" + output = [["Character", "&cfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: chcy without a semi-colon" do + input = "&chcy" + output = [["Character", "&chcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: check without a semi-colon" do + input = "&check" + output = [["Character", "&check"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: checkmark without a semi-colon" do + input = "&checkmark" + output = [["Character", "&checkmark"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: chi without a semi-colon" do + input = "&chi" + output = [["Character", "&chi"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cir without a semi-colon" do + input = "&cir" + output = [["Character", "&cir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cirE without a semi-colon" do + input = "&cirE" + output = [["Character", "&cirE"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circ without a semi-colon" do + input = "&circ" + output = [["Character", "&circ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circeq without a semi-colon" do + input = "&circeq" + output = [["Character", "&circeq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circlearrowleft without a semi-colon" do + input = "&circlearrowleft" + output = [["Character", "&circlearrowleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circlearrowright without a semi-colon" do + input = "&circlearrowright" + output = [["Character", "&circlearrowright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circledR without a semi-colon" do + input = "&circledR" + output = [["Character", "&circledR"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circledS without a semi-colon" do + input = "&circledS" + output = [["Character", "&circledS"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circledast without a semi-colon" do + input = "&circledast" + output = [["Character", "&circledast"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circledcirc without a semi-colon" do + input = "&circledcirc" + output = [["Character", "&circledcirc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: circleddash without a semi-colon" do + input = "&circleddash" + output = [["Character", "&circleddash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cire without a semi-colon" do + input = "&cire" + output = [["Character", "&cire"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cirfnint without a semi-colon" do + input = "&cirfnint" + output = [["Character", "&cirfnint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cirmid without a semi-colon" do + input = "&cirmid" + output = [["Character", "&cirmid"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cirscir without a semi-colon" do + input = "&cirscir" + output = [["Character", "&cirscir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: clubs without a semi-colon" do + input = "&clubs" + output = [["Character", "&clubs"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: clubsuit without a semi-colon" do + input = "&clubsuit" + output = [["Character", "&clubsuit"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: colon without a semi-colon" do + input = "&colon" + output = [["Character", "&colon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: colone without a semi-colon" do + input = "&colone" + output = [["Character", "&colone"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: coloneq without a semi-colon" do + input = "&coloneq" + output = [["Character", "&coloneq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: comma without a semi-colon" do + input = "&comma" + output = [["Character", "&comma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: commat without a semi-colon" do + input = "&commat" + output = [["Character", "&commat"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: comp without a semi-colon" do + input = "&comp" + output = [["Character", "&comp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: compfn without a semi-colon" do + input = "&compfn" + output = [["Character", "&compfn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: complement without a semi-colon" do + input = "&complement" + output = [["Character", "&complement"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: complexes without a semi-colon" do + input = "&complexes" + output = [["Character", "&complexes"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cong without a semi-colon" do + input = "&cong" + output = [["Character", "&cong"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: congdot without a semi-colon" do + input = "&congdot" + output = [["Character", "&congdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: conint without a semi-colon" do + input = "&conint" + output = [["Character", "&conint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: copf without a semi-colon" do + input = "&copf" + output = [["Character", "&copf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: coprod without a semi-colon" do + input = "&coprod" + output = [["Character", "&coprod"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: crarr without a semi-colon" do + input = "&crarr" + output = [["Character", "&crarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cross without a semi-colon" do + input = "&cross" + output = [["Character", "&cross"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cscr without a semi-colon" do + input = "&cscr" + output = [["Character", "&cscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: csub without a semi-colon" do + input = "&csub" + output = [["Character", "&csub"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: csube without a semi-colon" do + input = "&csube" + output = [["Character", "&csube"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: csup without a semi-colon" do + input = "&csup" + output = [["Character", "&csup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: csupe without a semi-colon" do + input = "&csupe" + output = [["Character", "&csupe"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ctdot without a semi-colon" do + input = "&ctdot" + output = [["Character", "&ctdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cudarrl without a semi-colon" do + input = "&cudarrl" + output = [["Character", "&cudarrl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cudarrr without a semi-colon" do + input = "&cudarrr" + output = [["Character", "&cudarrr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cuepr without a semi-colon" do + input = "&cuepr" + output = [["Character", "&cuepr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cuesc without a semi-colon" do + input = "&cuesc" + output = [["Character", "&cuesc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cularr without a semi-colon" do + input = "&cularr" + output = [["Character", "&cularr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cularrp without a semi-colon" do + input = "&cularrp" + output = [["Character", "&cularrp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cup without a semi-colon" do + input = "&cup" + output = [["Character", "&cup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cupbrcap without a semi-colon" do + input = "&cupbrcap" + output = [["Character", "&cupbrcap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cupcap without a semi-colon" do + input = "&cupcap" + output = [["Character", "&cupcap"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cupcup without a semi-colon" do + input = "&cupcup" + output = [["Character", "&cupcup"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cupdot without a semi-colon" do + input = "&cupdot" + output = [["Character", "&cupdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cupor without a semi-colon" do + input = "&cupor" + output = [["Character", "&cupor"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/namedEntities_part9_test.exs b/test/floki/html/generated/tokenizer/namedEntities_part9_test.exs new file mode 100644 index 00000000..2fd60381 --- /dev/null +++ b/test/floki/html/generated/tokenizer/namedEntities_part9_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NamedentitiesPart9Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests namedEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Bad named entity: cups without a semi-colon" do + input = "&cups" + output = [["Character", "&cups"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curarr without a semi-colon" do + input = "&curarr" + output = [["Character", "&curarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curarrm without a semi-colon" do + input = "&curarrm" + output = [["Character", "&curarrm"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curlyeqprec without a semi-colon" do + input = "&curlyeqprec" + output = [["Character", "&curlyeqprec"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curlyeqsucc without a semi-colon" do + input = "&curlyeqsucc" + output = [["Character", "&curlyeqsucc"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curlyvee without a semi-colon" do + input = "&curlyvee" + output = [["Character", "&curlyvee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curlywedge without a semi-colon" do + input = "&curlywedge" + output = [["Character", "&curlywedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curvearrowleft without a semi-colon" do + input = "&curvearrowleft" + output = [["Character", "&curvearrowleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: curvearrowright without a semi-colon" do + input = "&curvearrowright" + output = [["Character", "&curvearrowright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cuvee without a semi-colon" do + input = "&cuvee" + output = [["Character", "&cuvee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cuwed without a semi-colon" do + input = "&cuwed" + output = [["Character", "&cuwed"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cwconint without a semi-colon" do + input = "&cwconint" + output = [["Character", "&cwconint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cwint without a semi-colon" do + input = "&cwint" + output = [["Character", "&cwint"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: cylcty without a semi-colon" do + input = "&cylcty" + output = [["Character", "&cylcty"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dArr without a semi-colon" do + input = "&dArr" + output = [["Character", "&dArr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dHar without a semi-colon" do + input = "&dHar" + output = [["Character", "&dHar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dagger without a semi-colon" do + input = "&dagger" + output = [["Character", "&dagger"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: daleth without a semi-colon" do + input = "&daleth" + output = [["Character", "&daleth"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: darr without a semi-colon" do + input = "&darr" + output = [["Character", "&darr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dash without a semi-colon" do + input = "&dash" + output = [["Character", "&dash"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dashv without a semi-colon" do + input = "&dashv" + output = [["Character", "&dashv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dbkarow without a semi-colon" do + input = "&dbkarow" + output = [["Character", "&dbkarow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dblac without a semi-colon" do + input = "&dblac" + output = [["Character", "&dblac"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dcaron without a semi-colon" do + input = "&dcaron" + output = [["Character", "&dcaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dcy without a semi-colon" do + input = "&dcy" + output = [["Character", "&dcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dd without a semi-colon" do + input = "&dd" + output = [["Character", "&dd"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ddagger without a semi-colon" do + input = "&ddagger" + output = [["Character", "&ddagger"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ddarr without a semi-colon" do + input = "&ddarr" + output = [["Character", "&ddarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ddotseq without a semi-colon" do + input = "&ddotseq" + output = [["Character", "&ddotseq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: delta without a semi-colon" do + input = "&delta" + output = [["Character", "&delta"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: demptyv without a semi-colon" do + input = "&demptyv" + output = [["Character", "&demptyv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dfisht without a semi-colon" do + input = "&dfisht" + output = [["Character", "&dfisht"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dfr without a semi-colon" do + input = "&dfr" + output = [["Character", "&dfr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dharl without a semi-colon" do + input = "&dharl" + output = [["Character", "&dharl"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dharr without a semi-colon" do + input = "&dharr" + output = [["Character", "&dharr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: diam without a semi-colon" do + input = "&diam" + output = [["Character", "&diam"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: diamond without a semi-colon" do + input = "&diamond" + output = [["Character", "&diamond"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: diamondsuit without a semi-colon" do + input = "&diamondsuit" + output = [["Character", "&diamondsuit"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: diams without a semi-colon" do + input = "&diams" + output = [["Character", "&diams"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: die without a semi-colon" do + input = "&die" + output = [["Character", "&die"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: digamma without a semi-colon" do + input = "&digamma" + output = [["Character", "&digamma"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: disin without a semi-colon" do + input = "&disin" + output = [["Character", "&disin"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: div without a semi-colon" do + input = "&div" + output = [["Character", "&div"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: divonx without a semi-colon" do + input = "&divonx" + output = [["Character", "&divonx"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: djcy without a semi-colon" do + input = "&djcy" + output = [["Character", "&djcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dlcorn without a semi-colon" do + input = "&dlcorn" + output = [["Character", "&dlcorn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dlcrop without a semi-colon" do + input = "&dlcrop" + output = [["Character", "&dlcrop"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dollar without a semi-colon" do + input = "&dollar" + output = [["Character", "&dollar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dopf without a semi-colon" do + input = "&dopf" + output = [["Character", "&dopf"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dot without a semi-colon" do + input = "&dot" + output = [["Character", "&dot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: doteq without a semi-colon" do + input = "&doteq" + output = [["Character", "&doteq"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: doteqdot without a semi-colon" do + input = "&doteqdot" + output = [["Character", "&doteqdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dotminus without a semi-colon" do + input = "&dotminus" + output = [["Character", "&dotminus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dotplus without a semi-colon" do + input = "&dotplus" + output = [["Character", "&dotplus"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dotsquare without a semi-colon" do + input = "&dotsquare" + output = [["Character", "&dotsquare"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: doublebarwedge without a semi-colon" do + input = "&doublebarwedge" + output = [["Character", "&doublebarwedge"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: downarrow without a semi-colon" do + input = "&downarrow" + output = [["Character", "&downarrow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: downdownarrows without a semi-colon" do + input = "&downdownarrows" + output = [["Character", "&downdownarrows"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: downharpoonleft without a semi-colon" do + input = "&downharpoonleft" + output = [["Character", "&downharpoonleft"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: downharpoonright without a semi-colon" do + input = "&downharpoonright" + output = [["Character", "&downharpoonright"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: drbkarow without a semi-colon" do + input = "&drbkarow" + output = [["Character", "&drbkarow"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: drcorn without a semi-colon" do + input = "&drcorn" + output = [["Character", "&drcorn"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: drcrop without a semi-colon" do + input = "&drcrop" + output = [["Character", "&drcrop"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dscr without a semi-colon" do + input = "&dscr" + output = [["Character", "&dscr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dscy without a semi-colon" do + input = "&dscy" + output = [["Character", "&dscy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dsol without a semi-colon" do + input = "&dsol" + output = [["Character", "&dsol"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dstrok without a semi-colon" do + input = "&dstrok" + output = [["Character", "&dstrok"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dtdot without a semi-colon" do + input = "&dtdot" + output = [["Character", "&dtdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dtri without a semi-colon" do + input = "&dtri" + output = [["Character", "&dtri"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dtrif without a semi-colon" do + input = "&dtrif" + output = [["Character", "&dtrif"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: duarr without a semi-colon" do + input = "&duarr" + output = [["Character", "&duarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: duhar without a semi-colon" do + input = "&duhar" + output = [["Character", "&duhar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dwangle without a semi-colon" do + input = "&dwangle" + output = [["Character", "&dwangle"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dzcy without a semi-colon" do + input = "&dzcy" + output = [["Character", "&dzcy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: dzigrarr without a semi-colon" do + input = "&dzigrarr" + output = [["Character", "&dzigrarr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eDDot without a semi-colon" do + input = "&eDDot" + output = [["Character", "&eDDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eDot without a semi-colon" do + input = "&eDot" + output = [["Character", "&eDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: easter without a semi-colon" do + input = "&easter" + output = [["Character", "&easter"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ecaron without a semi-colon" do + input = "&ecaron" + output = [["Character", "&ecaron"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ecir without a semi-colon" do + input = "&ecir" + output = [["Character", "&ecir"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ecolon without a semi-colon" do + input = "&ecolon" + output = [["Character", "&ecolon"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ecy without a semi-colon" do + input = "&ecy" + output = [["Character", "&ecy"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: edot without a semi-colon" do + input = "&edot" + output = [["Character", "&edot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ee without a semi-colon" do + input = "&ee" + output = [["Character", "&ee"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: efDot without a semi-colon" do + input = "&efDot" + output = [["Character", "&efDot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: efr without a semi-colon" do + input = "&efr" + output = [["Character", "&efr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: eg without a semi-colon" do + input = "&eg" + output = [["Character", "&eg"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: egs without a semi-colon" do + input = "&egs" + output = [["Character", "&egs"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: egsdot without a semi-colon" do + input = "&egsdot" + output = [["Character", "&egsdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: el without a semi-colon" do + input = "&el" + output = [["Character", "&el"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: elinters without a semi-colon" do + input = "&elinters" + output = [["Character", "&elinters"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: ell without a semi-colon" do + input = "&ell" + output = [["Character", "&ell"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: els without a semi-colon" do + input = "&els" + output = [["Character", "&els"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: elsdot without a semi-colon" do + input = "&elsdot" + output = [["Character", "&elsdot"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: emacr without a semi-colon" do + input = "&emacr" + output = [["Character", "&emacr"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: empty without a semi-colon" do + input = "&empty" + output = [["Character", "&empty"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: emptyset without a semi-colon" do + input = "&emptyset" + output = [["Character", "&emptyset"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: emptyv without a semi-colon" do + input = "&emptyv" + output = [["Character", "&emptyv"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: emsp without a semi-colon" do + input = "&emsp" + output = [["Character", "&emsp"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Bad named entity: emsp13 without a semi-colon" do + input = "&emsp13" + output = [["Character", "&emsp13"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/numericEntities_part1_test.exs b/test/floki/html/generated/tokenizer/numericEntities_part1_test.exs new file mode 100644 index 00000000..7590e0d2 --- /dev/null +++ b/test/floki/html/generated/tokenizer/numericEntities_part1_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NumericentitiesPart1Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests numericEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Invalid numeric entity character U+0000" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0001" do + input = "" + output = [["Character", <<1>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0002" do + input = "" + output = [["Character", <<2>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0003" do + input = "" + output = [["Character", <<3>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0004" do + input = "" + output = [["Character", <<4>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0005" do + input = "" + output = [["Character", <<5>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0006" do + input = "" + output = [["Character", <<6>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0007" do + input = "" + output = [["Character", "\a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0008" do + input = "" + output = [["Character", "\b"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+000B" do + input = " " + output = [["Character", "\v"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+000E" do + input = "" + output = [["Character", <<14>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+000F" do + input = "" + output = [["Character", <<15>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0010" do + input = "" + output = [["Character", <<16>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0011" do + input = "" + output = [["Character", <<17>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0012" do + input = "" + output = [["Character", <<18>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0013" do + input = "" + output = [["Character", <<19>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0014" do + input = "" + output = [["Character", <<20>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0015" do + input = "" + output = [["Character", <<21>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0016" do + input = "" + output = [["Character", <<22>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0017" do + input = "" + output = [["Character", <<23>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0018" do + input = "" + output = [["Character", <<24>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+0019" do + input = "" + output = [["Character", <<25>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+001A" do + input = "" + output = [["Character", <<26>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+001B" do + input = "" + output = [["Character", "\e"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+001C" do + input = "" + output = [["Character", <<28>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+001D" do + input = "" + output = [["Character", <<29>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+001E" do + input = "" + output = [["Character", <<30>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+001F" do + input = "" + output = [["Character", <<31>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+007F" do + input = "" + output = [["Character", "\d"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+10FFFE" do + input = "􏿾" + output = [["Character", "􏿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+10FFFF" do + input = "􏿿" + output = [["Character", "􏿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+1FFFE" do + input = "🿾" + output = [["Character", "🿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+1FFFF" do + input = "🿿" + output = [["Character", "🿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+2FFFE" do + input = "𯿾" + output = [["Character", "𯿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+2FFFF" do + input = "𯿿" + output = [["Character", "𯿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+3FFFE" do + input = "𿿾" + output = [["Character", "𿿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+3FFFF" do + input = "𿿿" + output = [["Character", "𿿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+4FFFE" do + input = "񏿾" + output = [["Character", "񏿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+4FFFF" do + input = "񏿿" + output = [["Character", "񏿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+5FFFE" do + input = "񟿾" + output = [["Character", "񟿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+5FFFF" do + input = "񟿿" + output = [["Character", "񟿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+6FFFE" do + input = "񯿾" + output = [["Character", "񯿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+6FFFF" do + input = "񯿿" + output = [["Character", "񯿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+7FFFE" do + input = "񿿾" + output = [["Character", "񿿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+7FFFF" do + input = "񿿿" + output = [["Character", "񿿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+8FFFE" do + input = "򏿾" + output = [["Character", "򏿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+8FFFF" do + input = "򏿿" + output = [["Character", "򏿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+9FFFE" do + input = "򟿾" + output = [["Character", "򟿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+9FFFF" do + input = "򟿿" + output = [["Character", "򟿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+AFFFE" do + input = "򯿾" + output = [["Character", "򯿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+AFFFF" do + input = "򯿿" + output = [["Character", "򯿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+BFFFE" do + input = "򿿾" + output = [["Character", "򿿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+BFFFF" do + input = "򿿿" + output = [["Character", "򿿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+CFFFE" do + input = "󏿾" + output = [["Character", "󏿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+CFFFF" do + input = "󏿿" + output = [["Character", "󏿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+D800" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+DFFF" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+DFFFE" do + input = "󟿾" + output = [["Character", "󟿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+DFFFF" do + input = "󟿿" + output = [["Character", "󟿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+EFFFE" do + input = "󯿾" + output = [["Character", "󯿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+EFFFF" do + input = "󯿿" + output = [["Character", "󯿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD0" do + input = "﷐" + output = [["Character", "﷐"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD1" do + input = "﷑" + output = [["Character", "﷑"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD2" do + input = "﷒" + output = [["Character", "﷒"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD3" do + input = "﷓" + output = [["Character", "﷓"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD4" do + input = "﷔" + output = [["Character", "﷔"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD5" do + input = "﷕" + output = [["Character", "﷕"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD6" do + input = "﷖" + output = [["Character", "﷖"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD7" do + input = "﷗" + output = [["Character", "﷗"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD8" do + input = "﷘" + output = [["Character", "﷘"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDD9" do + input = "﷙" + output = [["Character", "﷙"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDDA" do + input = "﷚" + output = [["Character", "﷚"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDDB" do + input = "﷛" + output = [["Character", "﷛"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDDC" do + input = "﷜" + output = [["Character", "﷜"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDDD" do + input = "﷝" + output = [["Character", "﷝"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDDE" do + input = "﷞" + output = [["Character", "﷞"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDDF" do + input = "﷟" + output = [["Character", "﷟"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE0" do + input = "﷠" + output = [["Character", "﷠"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE1" do + input = "﷡" + output = [["Character", "﷡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE2" do + input = "﷢" + output = [["Character", "﷢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE3" do + input = "﷣" + output = [["Character", "﷣"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE4" do + input = "﷤" + output = [["Character", "﷤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE5" do + input = "﷥" + output = [["Character", "﷥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE6" do + input = "﷦" + output = [["Character", "﷦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE7" do + input = "﷧" + output = [["Character", "﷧"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE8" do + input = "﷨" + output = [["Character", "﷨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDE9" do + input = "﷩" + output = [["Character", "﷩"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDEA" do + input = "﷪" + output = [["Character", "﷪"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDEB" do + input = "﷫" + output = [["Character", "﷫"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDEC" do + input = "﷬" + output = [["Character", "﷬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDED" do + input = "﷭" + output = [["Character", "﷭"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDEE" do + input = "﷮" + output = [["Character", "﷮"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FDEF" do + input = "﷯" + output = [["Character", "﷯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FFFE" do + input = "￾" + output = [["Character", <<239, 191, 190>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FFFF" do + input = "￿" + output = [["Character", <<239, 191, 191>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FFFFE" do + input = "󿿾" + output = [["Character", "󿿾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character U+FFFFF" do + input = "󿿿" + output = [["Character", "󿿿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid numeric entity character overflow" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid unterminated numeric entity character overflow" do + input = "�x" + output = [["Character", "�x"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Invalid unterminated numeric entity character overflow before EOF" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/numericEntities_part2_test.exs b/test/floki/html/generated/tokenizer/numericEntities_part2_test.exs new file mode 100644 index 00000000..cbf5b039 --- /dev/null +++ b/test/floki/html/generated/tokenizer/numericEntities_part2_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NumericentitiesPart2Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests numericEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Valid numeric entity character U+0009" do + input = " " + output = [["Character", "\t"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+000A" do + input = " " + output = [["Character", "\n"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0020" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0021" do + input = "!" + output = [["Character", "!"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0022" do + input = """ + output = [["Character", "\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0023" do + input = "#" + output = [["Character", "#"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0024" do + input = "$" + output = [["Character", "$"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0025" do + input = "%" + output = [["Character", "%"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0026" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0027" do + input = "'" + output = [["Character", "'"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0028" do + input = "(" + output = [["Character", "("]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0029" do + input = ")" + output = [["Character", ")"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+002A" do + input = "*" + output = [["Character", "*"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+002B" do + input = "+" + output = [["Character", "+"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+002C" do + input = "," + output = [["Character", ","]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+002D" do + input = "-" + output = [["Character", "-"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+002E" do + input = "." + output = [["Character", "."]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+002F" do + input = "/" + output = [["Character", "/"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0030" do + input = "0" + output = [["Character", "0"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0031" do + input = "1" + output = [["Character", "1"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0032" do + input = "2" + output = [["Character", "2"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0033" do + input = "3" + output = [["Character", "3"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0034" do + input = "4" + output = [["Character", "4"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0035" do + input = "5" + output = [["Character", "5"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0036" do + input = "6" + output = [["Character", "6"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0037" do + input = "7" + output = [["Character", "7"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0038" do + input = "8" + output = [["Character", "8"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0039" do + input = "9" + output = [["Character", "9"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+003A" do + input = ":" + output = [["Character", ":"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+003B" do + input = ";" + output = [["Character", ";"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+003C" do + input = "<" + output = [["Character", "<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+003D" do + input = "=" + output = [["Character", "="]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+003E" do + input = ">" + output = [["Character", ">"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+003F" do + input = "?" + output = [["Character", "?"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0040" do + input = "@" + output = [["Character", "@"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0041" do + input = "A" + output = [["Character", "A"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0042" do + input = "B" + output = [["Character", "B"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0043" do + input = "C" + output = [["Character", "C"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0044" do + input = "D" + output = [["Character", "D"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0045" do + input = "E" + output = [["Character", "E"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0046" do + input = "F" + output = [["Character", "F"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0047" do + input = "G" + output = [["Character", "G"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0048" do + input = "H" + output = [["Character", "H"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0049" do + input = "I" + output = [["Character", "I"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+004A" do + input = "J" + output = [["Character", "J"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+004B" do + input = "K" + output = [["Character", "K"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+004C" do + input = "L" + output = [["Character", "L"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+004D" do + input = "M" + output = [["Character", "M"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+004E" do + input = "N" + output = [["Character", "N"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+004F" do + input = "O" + output = [["Character", "O"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0050" do + input = "P" + output = [["Character", "P"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0051" do + input = "Q" + output = [["Character", "Q"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0052" do + input = "R" + output = [["Character", "R"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0053" do + input = "S" + output = [["Character", "S"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0054" do + input = "T" + output = [["Character", "T"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0055" do + input = "U" + output = [["Character", "U"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0056" do + input = "V" + output = [["Character", "V"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0057" do + input = "W" + output = [["Character", "W"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0058" do + input = "X" + output = [["Character", "X"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0059" do + input = "Y" + output = [["Character", "Y"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+005A" do + input = "Z" + output = [["Character", "Z"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+005B" do + input = "[" + output = [["Character", "["]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+005C" do + input = "\" + output = [["Character", "\\"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+005D" do + input = "]" + output = [["Character", "]"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+005E" do + input = "^" + output = [["Character", "^"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+005F" do + input = "_" + output = [["Character", "_"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0060" do + input = "`" + output = [["Character", "`"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0061" do + input = "a" + output = [["Character", "a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0062" do + input = "b" + output = [["Character", "b"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0063" do + input = "c" + output = [["Character", "c"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0064" do + input = "d" + output = [["Character", "d"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0065" do + input = "e" + output = [["Character", "e"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0066" do + input = "f" + output = [["Character", "f"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0067" do + input = "g" + output = [["Character", "g"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0068" do + input = "h" + output = [["Character", "h"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0069" do + input = "i" + output = [["Character", "i"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+006A" do + input = "j" + output = [["Character", "j"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+006B" do + input = "k" + output = [["Character", "k"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+006C" do + input = "l" + output = [["Character", "l"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+006D" do + input = "m" + output = [["Character", "m"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+006E" do + input = "n" + output = [["Character", "n"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+006F" do + input = "o" + output = [["Character", "o"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0070" do + input = "p" + output = [["Character", "p"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0071" do + input = "q" + output = [["Character", "q"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0072" do + input = "r" + output = [["Character", "r"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0073" do + input = "s" + output = [["Character", "s"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0074" do + input = "t" + output = [["Character", "t"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0075" do + input = "u" + output = [["Character", "u"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0076" do + input = "v" + output = [["Character", "v"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0077" do + input = "w" + output = [["Character", "w"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0078" do + input = "x" + output = [["Character", "x"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+0079" do + input = "y" + output = [["Character", "y"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+007A" do + input = "z" + output = [["Character", "z"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+007B" do + input = "{" + output = [["Character", "{"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+007C" do + input = "|" + output = [["Character", "|"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+007D" do + input = "}" + output = [["Character", "}"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+007E" do + input = "~" + output = [["Character", "~"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A0" do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A1" do + input = "¡" + output = [["Character", "¡"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A2" do + input = "¢" + output = [["Character", "¢"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/numericEntities_part3_test.exs b/test/floki/html/generated/tokenizer/numericEntities_part3_test.exs new file mode 100644 index 00000000..2a89fb20 --- /dev/null +++ b/test/floki/html/generated/tokenizer/numericEntities_part3_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.NumericentitiesPart3Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests numericEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Valid numeric entity character U+00A3" do + input = "£" + output = [["Character", "£"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A4" do + input = "¤" + output = [["Character", "¤"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A5" do + input = "¥" + output = [["Character", "¥"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A6" do + input = "¦" + output = [["Character", "¦"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A7" do + input = "§" + output = [["Character", "§"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A8" do + input = "¨" + output = [["Character", "¨"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00A9" do + input = "©" + output = [["Character", "©"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00AA" do + input = "ª" + output = [["Character", "ª"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00AB" do + input = "«" + output = [["Character", "«"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00AC" do + input = "¬" + output = [["Character", "¬"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00AD" do + input = "­" + output = [["Character", "­"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00AE" do + input = "®" + output = [["Character", "®"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00AF" do + input = "¯" + output = [["Character", "¯"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B0" do + input = "°" + output = [["Character", "°"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B1" do + input = "±" + output = [["Character", "±"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B2" do + input = "²" + output = [["Character", "²"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B3" do + input = "³" + output = [["Character", "³"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B4" do + input = "´" + output = [["Character", "´"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B5" do + input = "µ" + output = [["Character", "µ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B6" do + input = "¶" + output = [["Character", "¶"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B7" do + input = "·" + output = [["Character", "·"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B8" do + input = "¸" + output = [["Character", "¸"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00B9" do + input = "¹" + output = [["Character", "¹"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00BA" do + input = "º" + output = [["Character", "º"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00BB" do + input = "»" + output = [["Character", "»"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00BC" do + input = "¼" + output = [["Character", "¼"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00BD" do + input = "½" + output = [["Character", "½"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00BE" do + input = "¾" + output = [["Character", "¾"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00BF" do + input = "¿" + output = [["Character", "¿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C0" do + input = "À" + output = [["Character", "À"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C1" do + input = "Á" + output = [["Character", "Á"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C2" do + input = "Â" + output = [["Character", "Â"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C3" do + input = "Ã" + output = [["Character", "Ã"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C4" do + input = "Ä" + output = [["Character", "Ä"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C5" do + input = "Å" + output = [["Character", "Å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C6" do + input = "Æ" + output = [["Character", "Æ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C7" do + input = "Ç" + output = [["Character", "Ç"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C8" do + input = "È" + output = [["Character", "È"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00C9" do + input = "É" + output = [["Character", "É"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00CA" do + input = "Ê" + output = [["Character", "Ê"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00CB" do + input = "Ë" + output = [["Character", "Ë"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00CC" do + input = "Ì" + output = [["Character", "Ì"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00CD" do + input = "Í" + output = [["Character", "Í"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00CE" do + input = "Î" + output = [["Character", "Î"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00CF" do + input = "Ï" + output = [["Character", "Ï"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D0" do + input = "Ð" + output = [["Character", "Ð"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D1" do + input = "Ñ" + output = [["Character", "Ñ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D2" do + input = "Ò" + output = [["Character", "Ò"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D3" do + input = "Ó" + output = [["Character", "Ó"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D4" do + input = "Ô" + output = [["Character", "Ô"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D5" do + input = "Õ" + output = [["Character", "Õ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D6" do + input = "Ö" + output = [["Character", "Ö"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D7" do + input = "×" + output = [["Character", "×"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D8" do + input = "Ø" + output = [["Character", "Ø"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00D9" do + input = "Ù" + output = [["Character", "Ù"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00DA" do + input = "Ú" + output = [["Character", "Ú"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00DB" do + input = "Û" + output = [["Character", "Û"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00DC" do + input = "Ü" + output = [["Character", "Ü"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00DD" do + input = "Ý" + output = [["Character", "Ý"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00DE" do + input = "Þ" + output = [["Character", "Þ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00DF" do + input = "ß" + output = [["Character", "ß"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E0" do + input = "à" + output = [["Character", "à"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E1" do + input = "á" + output = [["Character", "á"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E2" do + input = "â" + output = [["Character", "â"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E3" do + input = "ã" + output = [["Character", "ã"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E4" do + input = "ä" + output = [["Character", "ä"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E5" do + input = "å" + output = [["Character", "å"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E6" do + input = "æ" + output = [["Character", "æ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E7" do + input = "ç" + output = [["Character", "ç"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E8" do + input = "è" + output = [["Character", "è"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00E9" do + input = "é" + output = [["Character", "é"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00EA" do + input = "ê" + output = [["Character", "ê"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00EB" do + input = "ë" + output = [["Character", "ë"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00EC" do + input = "ì" + output = [["Character", "ì"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00ED" do + input = "í" + output = [["Character", "í"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00EE" do + input = "î" + output = [["Character", "î"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00EF" do + input = "ï" + output = [["Character", "ï"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F0" do + input = "ð" + output = [["Character", "ð"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F1" do + input = "ñ" + output = [["Character", "ñ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F2" do + input = "ò" + output = [["Character", "ò"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F3" do + input = "ó" + output = [["Character", "ó"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F4" do + input = "ô" + output = [["Character", "ô"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F5" do + input = "õ" + output = [["Character", "õ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F6" do + input = "ö" + output = [["Character", "ö"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F7" do + input = "÷" + output = [["Character", "÷"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F8" do + input = "ø" + output = [["Character", "ø"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00F9" do + input = "ù" + output = [["Character", "ù"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00FA" do + input = "ú" + output = [["Character", "ú"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00FB" do + input = "û" + output = [["Character", "û"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00FC" do + input = "ü" + output = [["Character", "ü"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00FD" do + input = "ý" + output = [["Character", "ý"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00FE" do + input = "þ" + output = [["Character", "þ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+00FF" do + input = "ÿ" + output = [["Character", "ÿ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+10000" do + input = "𐀀" + output = [["Character", "𐀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+100000" do + input = "􀀀" + output = [["Character", "􀀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+10FFFD" do + input = "􏿽" + output = [["Character", "􏿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+1FFFD" do + input = "🿽" + output = [["Character", "🿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+20000" do + input = "𠀀" + output = [["Character", "𠀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+2FFFD" do + input = "𯿽" + output = [["Character", "𯿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+30000" do + input = "𰀀" + output = [["Character", "𰀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/numericEntities_part4_test.exs b/test/floki/html/generated/tokenizer/numericEntities_part4_test.exs new file mode 100644 index 00000000..c7d5db65 --- /dev/null +++ b/test/floki/html/generated/tokenizer/numericEntities_part4_test.exs @@ -0,0 +1,368 @@ +defmodule Floki.HTML.Generated.Tokenizer.NumericentitiesPart4Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests numericEntities.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Valid numeric entity character U+3FFFD" do + input = "𿿽" + output = [["Character", "𿿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+40000" do + input = "񀀀" + output = [["Character", "񀀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+4FFFD" do + input = "񏿽" + output = [["Character", "񏿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+50000" do + input = "񐀀" + output = [["Character", "񐀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+5FFFD" do + input = "񟿽" + output = [["Character", "񟿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+60000" do + input = "񠀀" + output = [["Character", "񠀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+6FFFD" do + input = "񯿽" + output = [["Character", "񯿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+70000" do + input = "񰀀" + output = [["Character", "񰀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+7FFFD" do + input = "񿿽" + output = [["Character", "񿿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+80000" do + input = "򀀀" + output = [["Character", "򀀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+8FFFD" do + input = "򏿽" + output = [["Character", "򏿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+90000" do + input = "򐀀" + output = [["Character", "򐀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+9FFFD" do + input = "򟿽" + output = [["Character", "򟿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+A0000" do + input = "򠀀" + output = [["Character", "򠀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+AFFFD" do + input = "򯿽" + output = [["Character", "򯿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+B0000" do + input = "򰀀" + output = [["Character", "򰀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+BFFFD" do + input = "򿿽" + output = [["Character", "򿿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+C0000" do + input = "󀀀" + output = [["Character", "󀀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+CFFFD" do + input = "󏿽" + output = [["Character", "󏿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+D0000" do + input = "󐀀" + output = [["Character", "󐀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+D7FF" do + input = "퟿" + output = [["Character", "퟿"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+DFFFD" do + input = "󟿽" + output = [["Character", "󟿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+E000" do + input = "" + output = [["Character", ""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+E0000" do + input = "󠀀" + output = [["Character", "󠀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+EFFFD" do + input = "󯿽" + output = [["Character", "󯿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+F0000" do + input = "󰀀" + output = [["Character", "󰀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+FDCF" do + input = "﷏" + output = [["Character", "﷏"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+FDF0" do + input = "ﷰ" + output = [["Character", "ﷰ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+FFFD" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Valid numeric entity character U+FFFFD" do + input = "󿿽" + output = [["Character", "󿿽"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test1_test.exs b/test/floki/html/generated/tokenizer/test1_test.exs new file mode 100644 index 00000000..c08e6e8b --- /dev/null +++ b/test/floki/html/generated/tokenizer/test1_test.exs @@ -0,0 +1,602 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test1Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test1.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 ASCII decimal entity" do + input = "$" + output = [["Character", "$"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 ASCII hexadecimal entity" do + input = "?" + output = [["Character", "?"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Ampersand EOF" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Ampersand ampersand EOF" do + input = "&&" + output = [["Character", "&&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Ampersand space EOF" do + input = "& " + output = [["Character", "& "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Ampersand, number sign" do + input = "&#" + output = [["Character", "&#"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Comment, Central dash no space" do + input = "" + output = [["Comment", "-"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Comment, two central dashes" do + input = "" + output = [["Comment", " --comment "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Correct Doctype case with EOF" do + input = " Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Correct Doctype lowercase" do + input = "" + output = [["DOCTYPE", "html", nil, nil, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Correct Doctype mixed case" do + input = "" + output = [["DOCTYPE", "html", nil, nil, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Correct Doctype uppercase" do + input = "" + output = [["DOCTYPE", "html", nil, nil, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Doctype in error" do + input = "" + output = [["DOCTYPE", "foo", nil, nil, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Empty end tag" do + input = "" + output = [] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Empty start tag" do + input = "<>" + output = [["Character", "<>"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 End Tag w/attribute" do + input = "" + output = [["StartTag", "h", %{}], ["EndTag", "h"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity in attribute without semicolon" do + input = "" + output = [["StartTag", "h", %{"a" => "©"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity in attribute without semicolon ending in 1" do + input = "" + output = [["StartTag", "h", %{"a" => "¬1"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity in attribute without semicolon ending in i" do + input = "" + output = [["StartTag", "h", %{"a" => "¬i"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity in attribute without semicolon ending in x" do + input = "" + output = [["StartTag", "h", %{"a" => "¬x"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity with trailing semicolon (1)" do + input = "I'm ¬it" + output = [["Character", "I'm ¬it"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity with trailing semicolon (2)" do + input = "I'm ∉" + output = [["Character", "I'm ∉"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity without trailing semicolon (1)" do + input = "I'm ¬it" + output = [["Character", "I'm ¬it"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity without trailing semicolon (2)" do + input = "I'm ¬in" + output = [["Character", "I'm ¬in"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Hexadecimal entity in attribute" do + input = "" + output = [["StartTag", "h", %{"a" => "?"}], ["EndTag", "h"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Multiple atts" do + input = "" + output = [["StartTag", "h", %{"a" => "b", "c" => "d"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Multiple atts no space" do + input = "" + output = [["StartTag", "h", %{"a" => "b", "c" => "d"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Nested comment" do + input = "" + output = [["Comment", " " + output = [["Comment", ""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Short comment three" do + input = "" + output = [["Comment", ""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Short comment two" do + input = "" + output = [["Comment", ""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Simple comment" do + input = "" + output = [["Comment", "comment"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Single Start Tag" do + input = "" + output = [["StartTag", "h", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Start Tag w/attribute" do + input = "" + output = [["StartTag", "h", %{"a" => "b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Start Tag w/attribute no quotes" do + input = "" + output = [["StartTag", "h", %{"a" => "b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Start of a comment" do + input = " Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Start/End Tag" do + input = "" + output = [["StartTag", "h", %{}], ["EndTag", "h"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Truncated doctype start" do + input = "" + output = [["Comment", "DOC"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Two unclosed start tags" do + input = "

One

Two" + + output = [ + ["StartTag", "p", %{}], + ["Character", "One"], + ["StartTag", "p", %{}], + ["Character", "Two"] + ] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Unfinished comment" do + input = "" + output = [["Comment", "?foo--"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Comment with dash" do + input = "c" + output = [["Character", "a"], ["Comment", "b"], ["Character", "c"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Empty end tag with following end tag" do + input = "ac" + output = [["Character", "a"], ["EndTag", "b"], ["Character", "c"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Empty end tag with following tag" do + input = "ac" + output = [["Character", "a"], ["StartTag", "b", %{}], ["Character", "c"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity + newline" do + input = "\nx\n>\n" + output = [["Character", "\nx\n>\n"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Entity without a name" do + input = "&;" + output = [["Character", "&;"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Hexadecimal entity pair representing a surrogate pair" do + input = "��" + output = [["Character", "��"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Hexadecimal entity representing the NUL character" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Hexadecimal entity with mixed uppercase and lowercase" do + input = "ꯍ" + output = [["Character", "ꯍ"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Illegal end tag name" do + input = "" + output = [["Comment", "1"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Incomplete doctype" do + input = " Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Incorrect DOCTYPE without a space before name" do + input = "" + output = [["DOCTYPE", "foo", nil, nil, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Non-void element containing trailing /" do + input = "" + output = [["StartTag", "h", %{}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Null Byte Replacement" do + input = <<0>> + output = [["Character", <<0>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Numeric entity representing a codepoint after 1114111 (U+10FFFF)" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Numeric entity representing the NUL character" do + input = "�" + output = [["Character", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Simili processing instruction" do + input = "" + output = [["Comment", "?namespace"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Single-quote after attribute name" do + input = "" + output = [["StartTag", "h", %{"'" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Start tag with no attributes but space before the greater-than sign" do + input = "" + output = [["StartTag", "h", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 StartTag containing /" do + input = "" + output = [["StartTag", "h", %{"a" => "b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 StartTag containing <" do + input = "" + output = [["StartTag", "a Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Unescaped <" do + input = "foo < bar" + output = [["Character", "foo < bar"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Unescaped Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Unescaped ampersand in attribute value" do + input = "" + output = [["StartTag", "h", %{"a" => "&"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Void element with permitted slash" do + input = "
" + output = [["StartTag", "br", %{}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Void element with permitted slash (with attribute)" do + input = "
" + output = [["StartTag", "br", %{"foo" => "bar"}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part10_test.exs b/test/floki/html/generated/tokenizer/test3_part10_test.exs new file mode 100644 index 00000000..3fa76d0d --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part10_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part10Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["DOCTYPE", "a", nil, "", false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a", nil, "�", false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part11_test.exs b/test/floki/html/generated/tokenizer/test3_part11_test.exs new file mode 100644 index 00000000..7ec4b8b4 --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part11_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part11Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["DOCTYPE", "a", nil, nil, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", "a�", nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["DOCTYPE", <<97, 31>>, nil, nil, false]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["Comment", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\"" do + input = "<\"" + output = [["Character", "<\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <&" do + input = "<&" + output = [["Character", "<&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <'" do + input = "<'" + output = [["Character", "<'"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <-" do + input = "<-" + output = [["Character", "<-"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <." do + input = "<." + output = [["Character", "<."]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["Comment", " �"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1
" do + input = "" + output = [["EndTag", "b"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "y"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "z"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["Comment", "�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "b"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "y"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["EndTag", "z"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <0" do + input = "<0" + output = [["Character", "<0"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <1" do + input = "<1" + output = [["Character", "<1"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <9" do + input = "<9" + output = [["Character", "<9"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <<" do + input = "<<" + output = [["Character", "<<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <=" do + input = "<=" + output = [["Character", "<="]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <>" do + input = "<>" + output = [["Character", "<>"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part12_test.exs b/test/floki/html/generated/tokenizer/test3_part12_test.exs new file mode 100644 index 00000000..2ceaeb81 --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part12_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part12Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["Comment", "? �"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", "?"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["Comment", "?�"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <@" do + input = "<@" + output = [["Character", "<@"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "b", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "y", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "z", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <[" do + input = "<[" + output = [["Character", "<["]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\\u0000" do + input = <<60, 0>> + output = [["Character", <<60, 0>>]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\\u0009" do + input = "<\t" + output = [["Character", "<\t"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\\u000A" do + input = "<\n" + output = [["Character", "<\n"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\\u000B" do + input = "<\v" + output = [["Character", "<\v"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\\u000C" do + input = "<\f" + output = [["Character", "<\f"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <\\uDBC0\\uDC00" do + input = "<􀀀" + output = [["Character", "<􀀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <`" do + input = "<`" + output = [["Character", "<`"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"!" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\"" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"#" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"&" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"'" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"(" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"-" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"." => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"0" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"1" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"9" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"<" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"=" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"?" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"@" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"[" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 0, 62>> + output = [["StartTag", "a", %{"�" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\v" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 31, 62>> + output = [["StartTag", "a", %{<<31>> => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"􀀀" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"`" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"!" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\"" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"#" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"&" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"'" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"(" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"-" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"." => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"0" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"1" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"9" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"<" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"?" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"@" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part13_test.exs b/test/floki/html/generated/tokenizer/test3_part13_test.exs new file mode 100644 index 00000000..db30e780 --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part13_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part13Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"[" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 32, 0, 62>> + output = [["StartTag", "a", %{"a" => "", "�" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\b" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\v" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 32, 31, 62>> + output = [["StartTag", "a", %{<<31>> => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "􀀀" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"`" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "{" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a!" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a\"" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a#" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a&" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a'" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a(" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a-" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a." => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a0" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a1" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a9" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a<" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "!"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => " "}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "!"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "#"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "%"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "&"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "'"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "-"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "/"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "0"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "1"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "9"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "<"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "="}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \">" do + input = "\">" + output = [["StartTag", "a", %{"a" => ">"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "?"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "@"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "A"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "B"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "Y"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "Z"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 34, 0, 34, 62>> + output = [["StartTag", "a", %{"a" => "�"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\t"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\n"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\v"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\f"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "􀀀"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "`"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "y"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "z"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "{"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "#"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "%"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "&"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => " "}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "!"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "%"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "&"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"!" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\"" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"&" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"'" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"-" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"." => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"0" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"1" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"9" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"<" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"=" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"?" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"@" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 39, 39, 0, 62>> + output = [["StartTag", "a", %{"a" => "", "�" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\b" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\v" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 39, 39, 31, 62>> + output = [["StartTag", "a", %{<<31>> => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part14_test.exs b/test/floki/html/generated/tokenizer/test3_part14_test.exs new file mode 100644 index 00000000..728a50de --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part14_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part14Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "􀀀" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"`" => "", "a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "", "{" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "("}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "-"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "/"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "0"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "1"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "9"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "<"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "="}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ">"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "?"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "@"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "A"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "B"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "Y"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "Z"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 39, 0, 39, 62>> + output = [["StartTag", "a", %{"a" => "�"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\t"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\n"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\v"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\f"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "􀀀"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "`"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "y"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "z"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "{"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "("}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "-"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "/"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "0"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "1"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "9"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "<"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "="}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "?"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "@"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "A"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "B"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "Y"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "Z"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 0, 62>> + output = [["StartTag", "a", %{"a" => "�"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "\v"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 31, 62>> + output = [["StartTag", "a", %{"a" => <<31>>}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "􀀀"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "`"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a!"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a\""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a#"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a%"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a&"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a'"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a("}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a-"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a/"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a0"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a1"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a9"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a<"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a="}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a?"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a@"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "aA"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "aB"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "aY"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "aZ"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 97, 0, 62>> + output = [["StartTag", "a", %{"a" => "a�"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a\b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a\v"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 61, 97, 31, 62>> + output = [["StartTag", "a", %{"a" => <<97, 31>>}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a􀀀"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a`"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "aa"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "ab"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "ay"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "az"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "a{"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "b"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "y"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "z"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => "{"}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part15_test.exs b/test/floki/html/generated/tokenizer/test3_part15_test.exs new file mode 100644 index 00000000..85884c1b --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part15_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part15Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a?" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a@" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"aa" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"ab" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"ay" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"az" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a[" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 0, 62>> + output = [["StartTag", "a", %{"a�" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a\b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a\v" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 32, 97, 31, 62>> + output = [["StartTag", "a", %{<<97, 31>> => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a􀀀" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a`" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"aa" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"ab" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"ay" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"az" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a{" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"{" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a!", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a\"", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a&", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a'", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a-", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a.", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"!" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\"" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"&" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"'" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"-" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"0" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"1" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"9" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"<" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"=" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}, true]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"?" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"@" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 47, 0, 62>> + output = [["StartTag", "a", %{"�" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"\v" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"􀀀" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"`" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"a" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"b" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"y" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"z" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{"{" => ""}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a0", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a1", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a9", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a<", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a=", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a?", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a@", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "aa", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "ab", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "ay", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "az", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a[", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 0, 62>> + output = [["StartTag", "a�", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a\b", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a\v", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = <<60, 97, 31, 62>> + output = [["StartTag", <<97, 31>>, %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a􀀀", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a`", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "aa", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "ab", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "ay", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "az", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "a{", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "b", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "y", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["StartTag", "z", %{}]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <{" do + input = "<{" + output = [["Character", "<{"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 =" do + input = "=" + output = [["Character", "="]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 >" do + input = ">" + output = [["Character", ">"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 ?" do + input = "?" + output = [["Character", "?"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 @" do + input = "@" + output = [["Character", "@"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part16_test.exs b/test/floki/html/generated/tokenizer/test3_part16_test.exs new file mode 100644 index 00000000..ce3e77c3 --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part16_test.exs @@ -0,0 +1,188 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part16Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 A" do + input = "A" + output = [["Character", "A"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 B" do + input = "B" + output = [["Character", "B"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Y" do + input = "Y" + output = [["Character", "Y"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Z" do + input = "Z" + output = [["Character", "Z"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \\u0009" do + input = "\t" + output = [["Character", "\t"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \\u000A" do + input = "\n" + output = [["Character", "\n"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \\u000B" do + input = "\v" + output = [["Character", "\v"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \\u000C" do + input = "\f" + output = [["Character", "\f"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \\uDBC0\\uDC00" do + input = "􀀀" + output = [["Character", "􀀀"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 `" do + input = "`" + output = [["Character", "`"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 a" do + input = "a" + output = [["Character", "a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 b" do + input = "b" + output = [["Character", "b"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 y" do + input = "y" + output = [["Character", "y"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 z" do + input = "z" + output = [["Character", "z"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 {" do + input = "{" + output = [["Character", "{"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end +end diff --git a/test/floki/html/generated/tokenizer/test3_part1_test.exs b/test/floki/html/generated/tokenizer/test3_part1_test.exs new file mode 100644 index 00000000..deefcc37 --- /dev/null +++ b/test/floki/html/generated/tokenizer/test3_part1_test.exs @@ -0,0 +1,1208 @@ +defmodule Floki.HTML.Generated.Tokenizer.Test3Part1Test do + use ExUnit.Case, async: true + + # NOTE: This file was generated by "mix generate_tokenizer_tests test3.test". + # html5lib-tests rev: e52ff68cc7113a6ef3687747fa82691079bf9cc5 + + alias Floki.HTML.Tokenizer + + test "tokenize/1 " do + input = " " + output = [["Character", " "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 !" do + input = "!" + output = [["Character", "!"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 \"" do + input = "\"" + output = [["Character", "\""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 %" do + input = "%" + output = [["Character", "%"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 &" do + input = "&" + output = [["Character", "&"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 '" do + input = "'" + output = [["Character", "'"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 ," do + input = "," + output = [["Character", ","]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 -" do + input = "-" + output = [["Character", "-"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 ." do + input = "." + output = [["Character", "."]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 /" do + input = "/" + output = [["Character", "/"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 0" do + input = "0" + output = [["Character", "0"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 1" do + input = "1" + output = [["Character", "1"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 9" do + input = "9" + output = [["Character", "9"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 ;" do + input = ";" + output = [["Character", ";"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 <" do + input = "<" + output = [["Character", "<"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 < " do + input = "< " + output = [["Character", "< "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 > + output = [["Comment", " �"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", "-- "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", "-- "]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", "-- a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", "--!"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", ""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", "--!a"]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1 " do + input = "" + output = [["Comment", ""]] + + result = + input + |> Tokenizer.tokenize() + |> TokenizerTestLoader.tokenization_result() + + assert result.tokens == output + end + + test "tokenize/1