Tokenizer.php 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. <?php declare(strict_types = 1);
  2. namespace TheSeer\Tokenizer;
  3. class Tokenizer {
  4. /**
  5. * Token Map for "non-tokens"
  6. *
  7. * @var array
  8. */
  9. private $map = [
  10. '(' => 'T_OPEN_BRACKET',
  11. ')' => 'T_CLOSE_BRACKET',
  12. '[' => 'T_OPEN_SQUARE',
  13. ']' => 'T_CLOSE_SQUARE',
  14. '{' => 'T_OPEN_CURLY',
  15. '}' => 'T_CLOSE_CURLY',
  16. ';' => 'T_SEMICOLON',
  17. '.' => 'T_DOT',
  18. ',' => 'T_COMMA',
  19. '=' => 'T_EQUAL',
  20. '<' => 'T_LT',
  21. '>' => 'T_GT',
  22. '+' => 'T_PLUS',
  23. '-' => 'T_MINUS',
  24. '*' => 'T_MULT',
  25. '/' => 'T_DIV',
  26. '?' => 'T_QUESTION_MARK',
  27. '!' => 'T_EXCLAMATION_MARK',
  28. ':' => 'T_COLON',
  29. '"' => 'T_DOUBLE_QUOTES',
  30. '@' => 'T_AT',
  31. '&' => 'T_AMPERSAND',
  32. '%' => 'T_PERCENT',
  33. '|' => 'T_PIPE',
  34. '$' => 'T_DOLLAR',
  35. '^' => 'T_CARET',
  36. '~' => 'T_TILDE',
  37. '`' => 'T_BACKTICK'
  38. ];
  39. public function parse(string $source): TokenCollection {
  40. $result = new TokenCollection();
  41. if ($source === '') {
  42. return $result;
  43. }
  44. $tokens = token_get_all($source);
  45. $lastToken = new Token(
  46. $tokens[0][2],
  47. 'Placeholder',
  48. ''
  49. );
  50. foreach ($tokens as $pos => $tok) {
  51. if (is_string($tok)) {
  52. $token = new Token(
  53. $lastToken->getLine(),
  54. $this->map[$tok],
  55. $tok
  56. );
  57. $result->addToken($token);
  58. $lastToken = $token;
  59. continue;
  60. }
  61. $line = $tok[2];
  62. $values = preg_split('/\R+/Uu', $tok[1]);
  63. foreach ($values as $v) {
  64. $token = new Token(
  65. $line,
  66. token_name($tok[0]),
  67. $v
  68. );
  69. $result->addToken($token);
  70. $line++;
  71. $lastToken = $token;
  72. }
  73. }
  74. return $result;
  75. }
  76. }