PHP Classes

File: lib/Haanga/Compiler/Tokenizer.php

Recommend this page to a friend!
  Classes of Cesar D. Rodas  >  Haanga  >  lib/Haanga/Compiler/Tokenizer.php  >  Download  
File: lib/Haanga/Compiler/Tokenizer.php
Role: Unit test script
Content type: text/plain
Description: Unit test script
Class: Haanga
Template engine to process Django style templates
Author: By
Last change: Improving Haanga

1. `phpunit` friendly
2. Improved {% load %} logic
Merge branch 'develop' into feature/constant-filters
added conditional expressions
Fixed issue #15
Added filters for contansts
Date: 3 years ago
Size: 20,055 bytes
 

Contents

Class file image Download
<?php
/*
  +---------------------------------------------------------------------------------+
  | Copyright (c) 2010 César Rodas and Menéame Comunicacions S.L.                   |
  +---------------------------------------------------------------------------------+
  | Redistribution and use in source and binary forms, with or without              |
  | modification, are permitted provided that the following conditions are met:     |
  | 1. Redistributions of source code must retain the above copyright               |
  |    notice, this list of conditions and the following disclaimer.                |
  |                                                                                 |
  | 2. Redistributions in binary form must reproduce the above copyright            |
  |    notice, this list of conditions and the following disclaimer in the          |
  |    documentation and/or other materials provided with the distribution.         |
  |                                                                                 |
  | 3. All advertising materials mentioning features or use of this software        |
  |    must display the following acknowledgement:                                  |
  |    This product includes software developed by César D. Rodas.                  |
  |                                                                                 |
  | 4. Neither the name of the César D. Rodas nor the                               |
  |    names of its contributors may be used to endorse or promote products         |
  |    derived from this software without specific prior written permission.        |
  |                                                                                 |
  | THIS SOFTWARE IS PROVIDED BY CÉSAR D. RODAS ''AS IS'' AND ANY                   |
  | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED       |
  | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          |
  | DISCLAIMED. IN NO EVENT SHALL CÉSAR D. RODAS BE LIABLE FOR ANY                  |
  | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES      |
  | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    |
  | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND     |
  | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT      |
  | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS   |
  | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE                     |
  +---------------------------------------------------------------------------------+
  | Authors: César Rodas <crodas@php.net>                                           |
  +---------------------------------------------------------------------------------+
*/

class HG_Parser Extends Haanga_Compiler_Parser
{
    /* subclass to made easier references to constants */
}


/**
 *  Hand-written Tokenizer class inspired by SQLite's tokenize.c
 *
 */
class Haanga_Compiler_Tokenizer
{
    /* they are case sensitive and sorted! */
    static $keywords = array(
        'AND'           => HG_Parser::T_AND,
        'FALSE'         => HG_Parser::T_FALSE,
        'NOT'           => HG_Parser::T_NOT,
        'OR'            => HG_Parser::T_OR,
        'TRUE'          => HG_Parser::T_TRUE,
        '_('            => HG_Parser::T_INTL,
        'as'            => HG_Parser::T_AS,
        'autoescape'    => HG_Parser::T_AUTOESCAPE,
        'block'         => HG_Parser::T_BLOCK,
        'by'            => HG_Parser::T_BY,
        'else'          => HG_Parser::T_ELSE,
        'empty'         => HG_Parser::T_EMPTY,
        'extends'       => HG_Parser::T_EXTENDS,
        'filter'        => HG_Parser::T_FILTER,
        'for'           => HG_Parser::T_FOR,
        'if'            => HG_Parser::T_IF,
        'ifchanged'     => HG_Parser::T_IFCHANGED,
        'ifequal'       => HG_Parser::T_IFEQUAL,
        'ifnotequal'    => HG_Parser::T_IFNOTEQUAL,
        'in'            => HG_Parser::T_IN,
        'include'       => HG_Parser::T_INCLUDE,
        'load'          => HG_Parser::T_LOAD,
        'not'           => HG_Parser::T_NOT,
        'regroup'       => HG_Parser::T_REGROUP,
        'set'           => HG_Parser::T_SET,
        'spacefull'     => HG_Parser::T_SPACEFULL,
        'step'          => HG_Parser::T_STEP,
        'with'          => HG_Parser::T_WITH,
    );

    /* common operations */
    static $operators_single = array(
        '!'     => HG_Parser::T_NOT,
        '%'     => HG_Parser::T_MOD,
        '&'     => HG_Parser::T_BITWISE,
        '('     => HG_Parser::T_LPARENT,
        ')'     => HG_Parser::T_RPARENT,
        '*'     => HG_Parser::T_TIMES,
        '+'     => HG_Parser::T_PLUS,
        ','     => HG_Parser::T_COMMA,
        '-'     => HG_Parser::T_MINUS,
        '.'     => HG_Parser::T_DOT,
        '/'     => HG_Parser::T_DIV, 
        ':'     => HG_Parser::T_COLON, 
        '<'     => HG_Parser::T_LT,
        '='     => HG_Parser::T_ASSIGN,
        '>'     => HG_Parser::T_GT,
        '?'     => HG_Parser::T_QUESTION, 
        '['     => HG_Parser::T_BRACKETS_OPEN,
        ']'     => HG_Parser::T_BRACKETS_CLOSE,
        '|'     => HG_Parser::T_FILTER_PIPE,
    );
    static $operators = array(
        '!=='   => HG_Parser::T_NE,
        '!='    => HG_Parser::T_NE,
        '&&'    => HG_Parser::T_AND,
        '->'    => HG_Parser::T_OBJ,
        '..'    => HG_Parser::T_DOTDOT,
        '::'    => HG_Parser::T_CLASS,
        '<<'    => HG_Parser::T_BITWISE,
        '<='    => HG_Parser::T_LE,
        '==='   => HG_Parser::T_EQ,
        '=='    => HG_Parser::T_EQ,
        '>='    => HG_Parser::T_GE,
        '>>'    => HG_Parser::T_BITWISE,
        '||'    => HG_Parser::T_PIPE,
    );

    static $close_tags = array();

    static $open_tag     = "{%";
    static $end_tag      = "%}";
    static $open_comment = "{#";
    static $end_comment  = "#}";
    static $open_print   = "{{";
    static $end_print    = "}}";

    public $open_tags;
    public $value;
    public $token;
    public $status = self::IN_NONE;

    const IN_NONE    = 0;
    const IN_HTML    = 1;
    const IN_TAG     = 2;
    const IN_ECHO    = 3;

    protected $echoFirstToken = false;

    function __construct($data, $compiler, $file)
    {
        $this->data     = $data;
        $this->compiler = $compiler;
        $this->line     = 1;
        $this->N        = 0;
        $this->file     = $file;
        $this->length   = strlen($data);


        /*$tmp1 = self::$operators;
        $tmp2 = $tmp1;
        ksort($tmp2);
        var_dump($tmp2, $tmp1 === $tmp2);die();/**/

        self::$close_tags =array(
            self::$end_tag   => HG_Parser::T_TAG_CLOSE,
            self::$end_print => HG_Parser::T_PRINT_CLOSE,
        );


        $this->open_tags = array(
            self::$open_tag     => HG_Parser::T_TAG_OPEN,
            self::$open_print   => HG_Parser::T_PRINT_OPEN,
            self::$open_comment => HG_Parser::T_COMMENT,
        );
    }

    function yylex()
    {
        $this->token = NULL;

        if ($this->length == $this->N) {
            if ($this->status != self::IN_NONE && $this->status != self::IN_HTML) {
                $this->Error("Unexpected end");
            }
            return FALSE;
        }

        if ($this->status == self::IN_NONE) {
            $i    = &$this->N;
            $data = substr($this->data, $i, 12);

            static $lencache = array();
            foreach ($this->open_tags as $value => $token) {
                if (!isset($lencache[$value])) {
                    $lencache[$value] = strlen($value);
                }
                $len = $lencache[$value];
                if (strncmp($data, $value, $len) == 0) {
                    $this->value  = $value;
                    $this->token  = $token;
                    $i += $len;
                    switch ($this->token) {
                    case HG_Parser::T_TAG_OPEN:
                        $this->status = self::IN_TAG;
                        break;
                    case HG_Parser::T_COMMENT:
                        $zdata = & $this->data;

                        if (($pos=strpos($zdata, self::$end_comment, $i)) === FALSE) {
                            $this->error("unexpected end");
                        }

                        $this->value  = substr($zdata, $i, $pos-2);
                        $this->status = self::IN_NONE; 
                        $i = $pos + 2;
                        break;
                    case HG_Parser::T_PRINT_OPEN:
                        $this->status = self::IN_ECHO;
                        $this->echoFirstToken = false;
                        break;
                    }
                    return TRUE;
                }
            }

            $this->status = self::IN_HTML;
        }
    
        switch ($this->status)
        {
            case self::IN_TAG:
            case self::IN_ECHO:
                $this->yylex_main();
                break;
            default:
                $this->yylex_html();
        }


        if (empty($this->token)) {
            if ($this->status != self::IN_NONE && $this->status != self::IN_HTML) {
                $this->Error("Unexpected end");
            }
            return FALSE;
        }

        return TRUE;

    }

    function yylex_html()
    {
        $data = &$this->data;
        $i    = &$this->N;

        foreach ($this->open_tags as $value => $status) {
            $pos = strpos($data, $value, $i);
            if ($pos === FALSE) {
                continue;
            }
            if (!isset($lowest_pos) || $lowest_pos > $pos) {
                $lowest_pos = $pos;
            }
        }

        if (isset($lowest_pos)) {
            $this->value  = substr($data, $i, $lowest_pos-$i);
            $this->token  = HG_Parser::T_HTML;
            $this->status = self::IN_NONE;
            $i += $lowest_pos - $i;
        } else {
            $this->value  = substr($data, $i);
            $this->token  = HG_Parser::T_HTML;
            $i = $this->length;
        }

        $this->line += substr_count($this->value, "\n");

    }


    function yylex_main()
    {
        $data = &$this->data;

        for ($i=&$this->N; is_null($this->token) && $i < $this->length; ++$i) {
            switch ($data[$i]) {

            /* strings {{{ */
            case '"':
            case "'":
                $end   = $data[$i];
                $value = "";
                while ($data[++$i] != $end) {
                    switch ($data[$i]) {
                    case "\\":
                        switch ($data[++$i]) {
                        case "n":
                            $value .= "\n";
                            break;
                        case "t":
                            $value .= "\t";
                            break;
                        default:
                            $value .= $data[$i];
                        }
                        break;
                    case $end:
                        --$i;
                        break 2;
                    default:
                        if ($data[$i] == "\n") {
                            $this->line++;
                        }
                        $value .= $data[$i];
                    }
                    if (!isset($data[$i+1])) {
                        $this->Error("unclosed string");
                    }
                }
                $this->value = $value;
                $this->token = HG_Parser::T_STRING;
                break;
            /* }}} */

            /* number {{{ */
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9': 
                $value = "";
                $dot   = FALSE;
                for ($e=0; $i < $this->length; ++$e, ++$i) {
                    switch ($data[$i]) {
                    case '0': case '1': case '2': case '3': case '4': 
                    case '5': case '6': case '7': case '8': case '9': 
                        $value .= $data[$i];
                        break;
                    case '.':
                        if (!$dot) {
                            $value .= ".";
                            $dot    = TRUE;
                        } else {
                            $this->error("Invalid number");
                        }
                        break;
                    default: 
                        break 2; /* break the main loop */
                    }
                }
                if (!$this->is_token_end($data[$i]) &&
                    !isset(self::$operators_single[$data[$i]]) || $value[$e-1] == '.') {
                    $this->error("Unexpected '{$data[$i]}'");
                }
                $this->value = $value;
                $this->token = HG_Parser::T_NUMERIC;
                break 2;
            /* }}} */

            case "\n": case " ": case "\t": case "\r": case "\f":
                for (; is_null($this->token) && $i < $this->length; ++$i) {
                    switch ($data[$i]) {
                    case "\n":
                        $this->line++;
                    case " ": case "\t": case "\r": case "\f":
                        break;
                    case '.':
                        if ($data[$i+1] != '.') {
                            $this->token = HG_Parser::T_CONCAT;
                            $this->value = '.';
                            $i++;
                            return;
                        }
                    default:
                        /* break main loop */
                        /* and decrease because last processed byte */
                        /* wasn't a dot (T_CONCAT)                  */
                        --$i;  
                        break 2; 
                    }
                }
                break; /* whitespaces are ignored */
            default: 
                if (!$this->getTag() && !$this->getOperator()) {
                    $alpha = $this->getAlpha();
                    if ($alpha === FALSE) {
                        $this->error("error: unexpected ".substr($data, $i));
                    }
                    static $tag=NULL;
                    if (!$tag) {
                        $tag = Haanga_Extension::getInstance('Tag');
                    }

                    if ($this->status == self::IN_ECHO && !$this->echoFirstToken) {
                        $this->token =  HG_Parser::T_ALPHA;
                    } else {
                        $value = $tag->isValid($alpha);
                        $this->token = $value ? $value : HG_Parser::T_ALPHA;
                    }
                    $this->value = $alpha;

                }
                break 2;
            }
        }
    
        if ($this->status == self::IN_ECHO) {
            $this->echoFirstToken = true;
        }

        if ($this->token == HG_Parser::T_TAG_CLOSE ||
            $this->token == HG_Parser::T_PRINT_CLOSE) {
            $this->status = self::IN_NONE;
        }

    }

    function getTag()
    {
        static $lencache = array();

        $i    = &$this->N;
        $data = substr($this->data, $i, 12);
        foreach (self::$close_tags as $value => $token) {
            if (!isset($lencache[$value])) {
                $lencache[$value] = strlen($value);
            }
            $len = $lencache[$value];
            if (strncmp($data, $value, $len) == 0) {
                $this->token = $token;
                $this->value = $value;
                $i += $len;
                return TRUE;
            }
        }

        foreach (self::$keywords as $value => $token) {
            if (!isset($lencache[$value])) {
                $lencache[$value] = strlen($value);
            }
            $len = $lencache[$value];
            switch (strncmp($data, $value, $len)) {
            case -1:
                break 2;
            case 0: // match 
                if (isset($data[$len]) && !$this->is_token_end($data[$len])) {
                    /* probably a variable name TRUEfoo (and not TRUE) */
                    continue;
                }
                $this->token = $token;
                $this->value = $value;
                $i += $len;
                return TRUE;
            }
        }

        /* /end([a-zA-Z][a-zA-Z0-9]*)/ */
        if (strncmp($data, "end", 3) == 0) {
            $this->value = $this->getAlpha();
            $this->token = HG_Parser::T_CUSTOM_END;
            return TRUE;
        }
        
        return FALSE;
    }

    function Error($text)
    {
        throw new Haanga_Compiler_Exception($text." in ".$this->file.":".$this->line);
    }

    function getOperator()
    {
        static $lencache = array();

        $i    = &$this->N;
        $data = substr($this->data, $i, 12);

        foreach (self::$operators as $value => $token) {
            if (!isset($lencache[$value])) {
                $lencache[$value] = strlen($value);
            }
            $len = $lencache[$value];
            switch (strncmp($data, $value, $len)) {
            case -1:
                if (strlen($data) == $len) {
                    break 2;
                }
                break;
            case 0:
                $this->token = $token;
                $this->value = $value;
                $i += $len;
                return TRUE;
            }
        }

        $data = $this->data[$i];
        foreach (self::$operators_single as $value => $token) {
            if ($value == $data) {
                $this->token = $token;
                $this->value = $value;
                $i += 1;
                return TRUE;
            } else if ($value > $data) {
                break;
            }
        }


        return FALSE;
    }


    /**
     *  Return TRUE if $letter is a valid "token_end". We use token_end
     *  to avoid confuse T_ALPHA TRUEfoo with TRUE and foo (T_ALPHA)
     *
     *  @param string $letter
     *
     *  @return bool
     */
    protected function is_token_end($letter)
    {
        /* [^a-zA-Z0-9_] */
        return !(
            ('a' <= $letter && 'z' >= $letter) ||
            ('A' <= $letter && 'Z' >= $letter) || 
            ('0' <= $letter && '9' >= $letter) || 
            $letter == "_" 
        );
    }

    function getAlpha()
    {
        /* [a-zA-Z_][a-zA-Z0-9_]* */
        $i    = &$this->N;
        $data = &$this->data;

        if (  !('a' <= $data[$i] && 'z' >= $data[$i]) &&
            !('A' <= $data[$i] && 'Z' >= $data[$i]) && $data[$i] != '_') {
            return FALSE;
        }

        $value  = "";
        for (; $i < $this->length; ++$i) {
            if (
                ('a' <= $data[$i] && 'z' >= $data[$i]) ||
                ('A' <= $data[$i] && 'Z' >= $data[$i]) || 
                ('0' <= $data[$i] && '9' >= $data[$i]) || 
                $data[$i] == "_"
            ) {
                $value .= $data[$i];
            } else {
                break;
            }
        }

        return $value;
    }

    function getLine()
    {
        return $this->line;
    }


    static function init($template, $compiler, $file='')
    {
        $lexer  = new Haanga_Compiler_Tokenizer($template, $compiler, $file);
        file_put_contents('/tmp/foo.php', $file . "\n", FILE_APPEND);
        $parser = new Haanga_Compiler_Parser($lexer, $file);

        $parser->compiler = $compiler;

        try {
            for($i=0; ; $i++) {
                if  (!$lexer->yylex()) {
                    break;
                }
                $parser->doParse($lexer->token, $lexer->value);
            }
        } catch (Exception $e) {
            /* destroy the parser */
            try {
                $parser->doParse(0,0); 
            } catch (Exception $y) {}
            throw $e; /* re-throw exception */
        }

        $parser->doParse(0, 0);

        return (array)$parser->body;

    }
}
For more information send a message to info at phpclasses dot org.