[system] / trunk / wwmoodle / wwquestion / htmlparser.php Repository:
ViewVC logotype

View of /trunk/wwmoodle/wwquestion/htmlparser.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5156 - (download) (as text) (annotate)
Fri Jul 13 06:57:44 2007 UTC (12 years, 7 months ago) by mleventi
File size: 11405 byte(s)
Added support for dropdown fields in webwork questions

    1 <?php
    2 
    3 /*
    4  * Copyright (c) 2003 Jose Solorzano.  All rights reserved.
    5  * Redistribution of source must retain this copyright notice.
    6  *
    7  * Jose Solorzano (http://jexpert.us) is a software consultant.
    8  *
    9  * Contributions by:
   10  * - Leo West (performance improvements)
   11  */
   12 
   13 define ("NODE_TYPE_START",0);
   14 define ("NODE_TYPE_ELEMENT",1);
   15 define ("NODE_TYPE_ENDELEMENT",2);
   16 define ("NODE_TYPE_TEXT",3);
   17 define ("NODE_TYPE_COMMENT",4);
   18 define ("NODE_TYPE_DONE",5);
   19 
   20 /**
   21  * Class HtmlParser.
   22  * To use, create an instance of the class passing
   23  * HTML text. Then invoke parse() until it's false.
   24  * When parse() returns true, $iNodeType, $iNodeName
   25  * $iNodeValue and $iNodeAttributes are updated.
   26  *
   27  * To create an HtmlParser instance you may also
   28  * use convenience functions HtmlParser_ForFile
   29  * and HtmlParser_ForURL.
   30  */
   31 class HtmlParser {
   32 
   33     /**
   34      * Field iNodeType.
   35      * May be one of the NODE_TYPE_* constants above.
   36      */
   37     var $iNodeType;
   38 
   39     /**
   40      * Field iNodeName.
   41      * For elements, it's the name of the element.
   42      */
   43     var $iNodeName = "";
   44 
   45     /**
   46      * Field iNodeValue.
   47      * For text nodes, it's the text.
   48      */
   49     var $iNodeValue = "";
   50 
   51     /**
   52      * Field iNodeAttributes.
   53      * A string-indexed array containing attribute values
   54      * of the current node. Indexes are always lowercase.
   55      */
   56     var $iNodeAttributes;
   57 
   58     // The following fields should be
   59     // considered private:
   60 
   61     var $iHtmlText;
   62     var $iHtmlTextLength;
   63     var $iHtmlTextIndex = 0;
   64     var $iHtmlCurrentChar;
   65     var $BOE_ARRAY;
   66     var $B_ARRAY;
   67     var $BOS_ARRAY;
   68 
   69     /**
   70      * Constructor.
   71      * Constructs an HtmlParser instance with
   72      * the HTML text given.
   73      */
   74     function HtmlParser ($aHtmlText) {
   75         $this->iHtmlText = $aHtmlText;
   76         $this->iHtmlTextLength = strlen($aHtmlText);
   77         $this->iNodeAttributes = array();
   78         $this->setTextIndex (0);
   79 
   80         $this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
   81         $this->B_ARRAY = array (" ", "\t", "\r", "\n" );
   82         $this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
   83     }
   84 
   85     /**
   86      * Method parse.
   87      * Parses the next node. Returns false only if
   88      * the end of the HTML text has been reached.
   89      * Updates values of iNode* fields.
   90      */
   91     function parse() {
   92         $text = $this->skipToElement();
   93         if ($text != "") {
   94             $this->iNodeType = NODE_TYPE_TEXT;
   95             $this->iNodeName = "Text";
   96             $this->iNodeValue = $text;
   97             return true;
   98         }
   99         return $this->readTag();
  100     }
  101     function printTag() {
  102         if($this->iNodeType == NODE_TYPE_ELEMENT) {
  103             $temp = "<";
  104             $temp .= $this->iNodeName;
  105             if(isset($this->iNodeAttributes['selected'])) {
  106                 $temp .= " selected";
  107             }
  108             foreach($this->iNodeAttributes as $key => $value) {
  109                 if($key == 'selected') {
  110                 } else {
  111                     $temp .= " " . $key . "=" . '"' . $value . '"';
  112                 }
  113             }
  114             $temp .= ">";
  115         } else if($this->iNodeType == NODE_TYPE_ENDELEMENT) {
  116             $temp = "</" . $this->iNodeName . ">";
  117         } else if($this->iNodeType == NODE_TYPE_TEXT) {
  118             $temp = $this->iNodeValue;
  119         } else {
  120         }
  121         return $temp;
  122 
  123 
  124     }
  125 
  126     function clearAttributes() {
  127         $this->iNodeAttributes = array();
  128     }
  129 
  130     function readTag() {
  131         if ($this->iCurrentChar != "<") {
  132             $this->iNodeType = NODE_TYPE_DONE;
  133             return false;
  134         }
  135         $this->clearAttributes();
  136         $this->skipMaxInTag ("<", 1);
  137         if ($this->iCurrentChar == '/') {
  138             $this->moveNext();
  139             $name = $this->skipToBlanksInTag();
  140             $this->iNodeType = NODE_TYPE_ENDELEMENT;
  141             $this->iNodeName = $name;
  142             $this->iNodeValue = "";
  143             $this->skipEndOfTag();
  144             return true;
  145         }
  146         $name = $this->skipToBlanksOrSlashInTag();
  147         if (!$this->isValidTagIdentifier ($name)) {
  148                 $comment = false;
  149                 if (strpos($name, "!--") === 0) {
  150                     $ppos = strpos($name, "--", 3);
  151                     if (strpos($name, "--", 3) === (strlen($name) - 2)) {
  152                         $this->iNodeType = NODE_TYPE_COMMENT;
  153                         $this->iNodeName = "Comment";
  154                         $this->iNodeValue = "<" . $name . ">";
  155                         $comment = true;
  156                     }
  157                     else {
  158                         $rest = $this->skipToStringInTag ("-->");
  159                         if ($rest != "") {
  160                             $this->iNodeType = NODE_TYPE_COMMENT;
  161                             $this->iNodeName = "Comment";
  162                             $this->iNodeValue = "<" . $name . $rest;
  163                             $comment = true;
  164                             // Already skipped end of tag
  165                             return true;
  166                         }
  167                     }
  168                 }
  169                 if (!$comment) {
  170                     $this->iNodeType = NODE_TYPE_TEXT;
  171                     $this->iNodeName = "Text";
  172                     $this->iNodeValue = "<" . $name;
  173                     return true;
  174                 }
  175         }
  176         else {
  177                 $this->iNodeType = NODE_TYPE_ELEMENT;
  178                 $this->iNodeValue = "";
  179                 $this->iNodeName = $name;
  180                 while ($this->skipBlanksInTag()) {
  181                     $attrName = $this->skipToBlanksOrEqualsInTag();
  182                     if ($attrName != "" && $attrName != "/") {
  183                         $this->skipBlanksInTag();
  184                         if ($this->iCurrentChar == "=") {
  185                             $this->skipEqualsInTag();
  186                             $this->skipBlanksInTag();
  187                             $value = $this->readValueInTag();
  188                             $this->iNodeAttributes[strtolower($attrName)] = $value;
  189                         }
  190                         else {
  191                             $this->iNodeAttributes[strtolower($attrName)] = "";
  192                         }
  193                     }
  194                 }
  195         }
  196         $this->skipEndOfTag();
  197         return true;
  198     }
  199 
  200     function isValidTagIdentifier ($name) {
  201         return ereg ("^[A-Za-z0-9_\\-]+$", $name);
  202     }
  203 
  204     function skipBlanksInTag() {
  205         return "" != ($this->skipInTag ($this->B_ARRAY));
  206     }
  207 
  208     function skipToBlanksOrEqualsInTag() {
  209         return $this->skipToInTag ($this->BOE_ARRAY);
  210     }
  211 
  212     function skipToBlanksInTag() {
  213         return $this->skipToInTag ($this->B_ARRAY);
  214     }
  215 
  216     function skipToBlanksOrSlashInTag() {
  217         return $this->skipToInTag ($this->BOS_ARRAY);
  218     }
  219 
  220     function skipEqualsInTag() {
  221         return $this->skipMaxInTag ("=", 1);
  222     }
  223 
  224     function readValueInTag() {
  225         $ch = $this->iCurrentChar;
  226         $value = "";
  227         if ($ch == "\"") {
  228             $this->skipMaxInTag ("\"", 1);
  229             $value = $this->skipToInTag ("\"");
  230             $this->skipMaxInTag ("\"", 1);
  231         }
  232         else if ($ch == "'") {
  233             $this->skipMaxInTag ("'", 1);
  234             $value = $this->skipToInTag ("'");
  235             $this->skipMaxInTag ("'", 1);
  236         }
  237         else {
  238             $value = $this->skipToBlanksInTag();
  239         }
  240         return $value;
  241     }
  242 
  243     function setTextIndex ($index) {
  244         $this->iHtmlTextIndex = $index;
  245         if ($index >= $this->iHtmlTextLength) {
  246             $this->iCurrentChar = -1;
  247         }
  248         else {
  249             $this->iCurrentChar = $this->iHtmlText{$index};
  250         }
  251     }
  252 
  253     function moveNext() {
  254         if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
  255             $this->setTextIndex ($this->iHtmlTextIndex + 1);
  256             return true;
  257         }
  258         else {
  259             return false;
  260         }
  261     }
  262 
  263     function skipEndOfTag() {
  264         while (($ch = $this->iCurrentChar) !== -1) {
  265             if ($ch == ">") {
  266                 $this->moveNext();
  267                 return;
  268             }
  269             $this->moveNext();
  270         }
  271     }
  272 
  273     function skipInTag ($chars) {
  274         $sb = "";
  275         while (($ch = $this->iCurrentChar) !== -1) {
  276             if ($ch == ">") {
  277                 return $sb;
  278             } else {
  279                 $match = false;
  280                 for ($idx = 0; $idx < count($chars); $idx++) {
  281                     if ($ch == $chars[$idx]) {
  282                         $match = true;
  283                         break;
  284                     }
  285                 }
  286                 if (!$match) {
  287                     return $sb;
  288                 }
  289                 $sb .= $ch;
  290                 $this->moveNext();
  291             }
  292         }
  293         return $sb;
  294     }
  295 
  296     function skipMaxInTag ($chars, $maxChars) {
  297         $sb = "";
  298         $count = 0;
  299         while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
  300             if ($ch == ">") {
  301                 return $sb;
  302             } else {
  303                 $match = false;
  304                 for ($idx = 0; $idx < count($chars); $idx++) {
  305                     if ($ch == $chars[$idx]) {
  306                         $match = true;
  307                         break;
  308                     }
  309                 }
  310                 if (!$match) {
  311                     return $sb;
  312                 }
  313                 $sb .= $ch;
  314                 $this->moveNext();
  315             }
  316         }
  317         return $sb;
  318     }
  319 
  320     function skipToInTag ($chars) {
  321         $sb = "";
  322         while (($ch = $this->iCurrentChar) !== -1) {
  323             $match = $ch == ">";
  324             if (!$match) {
  325                 for ($idx = 0; $idx < count($chars); $idx++) {
  326                     if ($ch == $chars[$idx]) {
  327                         $match = true;
  328                         break;
  329                     }
  330                 }
  331             }
  332             if ($match) {
  333                 return $sb;
  334             }
  335             $sb .= $ch;
  336             $this->moveNext();
  337         }
  338         return $sb;
  339     }
  340 
  341     function skipToElement() {
  342         $sb = "";
  343         while (($ch = $this->iCurrentChar) !== -1) {
  344             if ($ch == "<") {
  345                 return $sb;
  346             }
  347             $sb .= $ch;
  348             $this->moveNext();
  349         }
  350         return $sb;
  351     }
  352 
  353     /**
  354      * Returns text between current position and $needle,
  355      * inclusive, or "" if not found. The current index is moved to a point
  356      * after the location of $needle, or not moved at all
  357      * if nothing is found.
  358      */
  359     function skipToStringInTag ($needle) {
  360         $pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
  361         if ($pos === false) {
  362             return "";
  363         }
  364         $top = $pos + strlen($needle);
  365         $retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
  366         $this->setTextIndex ($top);
  367         return $retvalue;
  368     }
  369 }
  370 
  371 function HtmlParser_ForFile ($fileName) {
  372     return HtmlParser_ForURL($fileName);
  373 }
  374 
  375 function HtmlParser_ForURL ($url) {
  376     $fp = fopen ($url, "r");
  377     $content = "";
  378     while (true) {
  379         $data = fread ($fp, 8192);
  380         if (strlen($data) == 0) {
  381             break;
  382         }
  383         $content .= $data;
  384     }
  385     fclose ($fp);
  386     return new HtmlParser ($content);
  387 }
  388 
  389 ?>

aubreyja at gmail dot com
ViewVC Help
Powered by ViewVC 1.0.9