1 /* Copyright (C) 2000-2004 Thomas Bopp, Thorsten Hampel, Ludger Merkens
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * $Id: htmllib.pmod,v 1.1 2008/03/31 13:39:57 exodusd Exp $
19 inherit "AbstractCallbacks";
23 #include <attributes.h>
28 //#define KEEP_UTF // keep the UTF8 converted encoding for output
32 string ahref_link_navigate(object obj, void|string prefix)
34 if ( !stringp(prefix) ) prefix = "";
35 return "<a "+href_link_navigate(obj)+">"+prefix+obj->get_identifier()+
39 string href_link_navigate_postfix(object obj, string prefix, string postfix)
45 if (!stringp(prefix)) prefix="";
46 if (!stringp(postfix))postfix="";
48 if ( obj->get_object_class() & CLASS_EXIT ) {
49 dest = obj->get_exit();
50 path = get_module("filepath:tree")->object_to_filename(dest);
51 href = "href=\""+path+postfix+"\"";
54 href = "href=\""+prefix+replace_uml(obj->get_identifier())+postfix+"\"";
58 string href_link_navigate(object obj, void|string prefix)
64 if ( !stringp(prefix) ) prefix = "";
66 if ( obj->get_object_class() & CLASS_EXIT ) {
67 dest = obj->get_exit();
68 path = get_module("filepath:tree")->object_to_filename(dest);
69 href = "href=\""+path+"\"";
72 href = "href=\""+prefix+replace_uml(obj->get_identifier())+"\"";
76 string create_tag(string name, mapping attrs)
78 string attr_string = "";
79 foreach(indices(attrs), string a) {
80 attr_string += " " + a + "=\""+attrs[a]+"\"";
82 return sprintf("<%s%s>", name, attr_string);
89 string output = ""; // the output
90 mapping rxml_handlers = ([ ]);
91 mapping rxml_attributes = ([ ]);
92 mapping variables = ([ ]);
93 string encoding = "utf-8";
96 ADT.Queue NodeDataQueue = ADT.Queue();
98 void create(mapping vars) {
103 int store_data(string data) {
104 string node_data = NodeDataQueue->read();
105 if ( stringp(node_data) ) {
107 NodeDataQueue->write(node_data);
115 void startDocumentSAX(object parser, void|mixed userData) {
116 output = "<!-- sTeam link consistency and HTML extension parser - modified document view !-->\n";
119 void startElementSAX(object parser, string name,
120 mapping(string:string) attrs, void|mixed userData)
122 if ( name == "script" )
125 if ( !rxml_handlers[name] ) {
126 string attr_string = "";
127 if ( mappingp(attrs) ) {
128 foreach(indices(attrs), string a) {
130 attr_string += " " + a + "=\""+attrs[a]+"\"";
133 string tagstr = "<"+name+ attr_string + ">";
136 if ( !store_data(tagstr) )
140 rxml_attributes[name] = attrs;
141 NodeDataQueue->write(""); // if is empty string then fill
147 string call_handler(function f, mapping attributes, string data)
149 mapping params = variables;
150 if ( !mappingp(attributes) )
153 params->args = attributes;
154 params->args->body = data;
156 mixed err = catch(result=f(params));
158 FATAL("SAX: error calling handler %s\n%O", err[0], err[1]);
159 result = "<!-- error calling handler -->";
166 void endElementSAX(object parser, string name, void|mixed userData)
170 if ( name == "script" )
172 function hfunc = rxml_handlers[name];
173 mapping attr = rxml_attributes[name];
175 if ( functionp(hfunc) ) {
176 tagstr = call_handler(hfunc, attr, NodeDataQueue->read());
178 if ( !store_data(tagstr) )
181 else if ( lower_case(name) != "br" )
183 tagstr = "</"+name+">";
185 if ( !store_data(tagstr) )
190 void errorSAX(object parser, string msg, void|mixed userData) {
191 output += "<!-- SAX: " + msg + "-->\n";
195 void cdataBlockSAX(object parser, string value, void|mixed userData)
198 value = replace(value, ({ "<", ">", }), ({ "<", ">" }));
200 if ( !store_data(value) )
203 void charactersSAX(object parser, string chars, void|mixed userData)
206 chars = replace(chars, ({ "<", ">", }), ({ "<", ">" }));
209 if ( !store_data(chars) )
212 void commentSAX(object parser, string value, void|mixed userData)
214 output += "<!--"+value+"-->\n";
218 void referenceSAX(object parser, string name, void|mixed userData)
220 werror("referenceSAX(%s)\n", name);
223 void entityDeclSAX(object parser, string name, int type, string publicId,
224 string systemId, string content, void|mixed userData)
226 werror("entityDecl(%s)\n", name);
229 void notationDeclSAX(object parser, string name, string publicId,
230 string systemId, void|mixed userData)
232 werror("notationDecl(%s)\n", name);
234 void unparsedEntityDeclSAX(object parser, string name, string publicId,
235 string systemId, string notationName,
238 werror("unparsedEntityDecl(%s)\n", name);
240 string getEntitySAX(object parser, string name, void|mixed userData)
242 werror("getEntitySax(%s)\n", name);
244 void attributeDeclSAX(object parser, string elem, string fullname,
245 int type, int def, void|mixed userData)
247 werror("attributeDeclSAX(%s, %s)\n", elem, fullname);
249 void internalSubsetSAX(object parser, string name, string externalID,
250 string systemID, void|mixed uData)
253 void ignorableWhitespaceSAX(object parser, string chars, void|mixed uData)
257 void set_handlers(mapping h)
267 string get_tag_name(object tag)
269 string name = tag->get_identifier();
270 sscanf(name, "%s.pike", name);
274 function get_tag_function(object tag)
279 catch(instance = tag->provide_instance());
280 if ( !objectp(instance) )
283 return instance->execute;
288 string parse_rxml(string|object html, mapping variables, mapping tags, string|void encoding)
290 object cb = rxmlHandler(variables);
293 cb->set_handlers(tags);
294 if ( objectp(html) ) {
295 encoding = html->query_attribute(DOC_ENCODING);
297 else if ( !stringp(encoding) )
298 encoding = detect_encoding(html);
300 encoding = lower_case(encoding);
303 if ( stringp(inp) && strlen(inp) == 0 )
306 object sax = xml.HTML(inp, cb, ([ ]), 0, stringp(html));
307 sax->parse(encoding);
308 string res = cb->get_result();
310 // now it IS utf8 - change back to former encoding
311 if ( stringp(encoding) && encoding != "utf-8" ) {
312 if ( catch(res = xml.utf8_to_html(res)) ) {
313 werror("HTML Conversion failed !\n");
314 if ( encoding == "iso-8859-1" ) {
315 if ( catch(res = xml.utf8_to_isolat1(res)) ) {
316 werror("Failed conversion - skipping rxml !\n");
321 werror("Failed conversion - skipping !\n");
322 return html; // do nothing
333 string execute(mapping vars) {
334 return "Hello World to " + vars->args->name;
340 string execute(mapping vars) {
341 return "<BODY>"+vars->args->body+"</BODY>";
348 "<html><body>Welcome! <h2><test name='test'/></h2></body></html>";
350 result = parse_rxml(result, ([ ]), ([ "test": testTag()->execute, ]));
352 "<html><body>Welcome! <h2>Hello World to test</h2></body></html>" )
353 error("rxml test failed - wrong result " + result);
355 result = "<a><b><c><c apply='1'>"+
357 "<d name='y'/></b></a>";
359 result = parse_rxml(result, ([ ]), ([ "d": testTag()->execute,
360 "c":tagTag()->execute,]));
362 "<a><b><c><BODY>Hello World to x</BODY>Hello World to y</b></a>" )
363 error("nested rxml test failed !");
366 function find_tag(string name)
368 object tags = OBJ("/tags");
369 if ( !objectp(tags) )
371 object tag = tags->get_object_byname(name+".pike");
374 return get_tag_function(tag);
378 mapping find_tags(object obj)
382 if ( obj->get_object_class() & CLASS_CONTAINER ) {
383 mapping result = ([ ]);
384 foreach(obj->get_inventory_by_class(CLASS_DOCLPC), object tag) {
385 function f = get_tag_function(tag);
386 string tagname = get_tag_name(tag);
388 FATAL("Warning - no tag function for tag: %s", tagname);
394 else if ( obj->get_object_class() & CLASS_DOCXSL) {
395 object env = obj->get_environment();
397 return find_tags(env->get_object_byname("tags"));
404 * Replace XML entities (< > &)
405 * with simple characters (< > &).
407 * @param str the string to replace
408 * @return a string without quoted characters
410 string unquote_xml ( string str )
412 return replace( str, ({ "<", ">", "&" }), ({ "<", ">", "&" }) );
417 * Replace problematic characters (< > &)
418 * with XML entities (< > &).
420 * @param str the string to replace
421 * @return a string with problematic characters quoted
423 string quote_xml ( string str )
425 return replace( str, ({ "<", ">", "&" }), ({ "<", ">", "&" }) );
430 * Replace HTML entities with umlauts, <, >, & etc.
431 * This method was taken from Pike Protocols.HTTP.unentity() and reversed.
433 * @param str the string to replace
434 * @return a utf-8 string without quoted characters
436 string unquote_html ( string str )
440 ({ "Æ", "Á", "Â", "À", "Å", "Ã",
441 "Ä", "Ç", "Ð", "É", "Ê", "È",
442 "Ë", "Í", "Î", "Ì", "Ï", "Ñ",
443 "Ó", "Ô", "Ò", "Ø", "Õ", "Ö",
444 "Þ", "Ú", "Û", "Ù", "Ü", "Ý",
445 "á", "â", "æ", "à", "'", "å",
446 "*", "ã", "ä", "¦", "ç", "¢",
447 ":", ",", "@", "©", "°", "$",
448 "é", "ê", "è", " ", " ", "=",
449 "ð", "ë", "!", "½", "¼", "¾",
450 "⅛", "⅜", "⅝", "⅞", ">", ">",
451 "½", "‐", "í", "î", "¡", "ì",
452 "¿", "ï", "«", "(", "[", "<",
453 "<", "—", "µ", "·", " ", "–",
454 "¬", "ñ", "ó", "ô", "ò", "ø",
455 "õ", "ö", "¶", "%", ".", "+",
456 "±", "£", "?", """, "»", "®",
457 ")", "]", "§", ";", "­", "¹",
458 "²", "³", "ß", "þ", "˜", "™",
459 "ú", "û", "ù", "ü", "ý", "¥",
460 "ÿ", "|", "&", """, "'", "�", ":" }),
462 ({ "?", "¡", "¬", "¿", "?", "?",
463 "?", "«", "?", "?", " ", "»",
464 "À", "Õ", "?", "Ã", "?", "?",
465 "?", "?", "?", "ÿ", "?", "÷",
466 "?", "?", "?", "?", "?", "?",
467 "·", "?", "Ê", "?", "'", "Â",
468 "*", "?", "?", "¶", "Á", "¢",
469 ":", ",", "@", "©", "?", "$",
470 "È", "Í", "Ë", " ", " ", "=",
471 "?", "Î", "!", "?", "º", "æ",
472 "⅛", "⅜", "⅝", "⅞", ">", ">",
473 "½", "‐", "Ì", "Ó", "°", "Ï",
474 "ø", "Ô", "´", "(", "[", "<",
475 "<", "—", "µ", "?", "", "–",
476 "¨", "Ò", "Û", "Ù", "Ú", "¯",
477 "?", "?", "?", "%", ".", "+",
478 "±", "£", "?", "\"", "ª", "Æ",
479 ")", "]", "ß", ";", "?", "?",
480 "?", "?", "?", "?", "~", "™",
481 "?", "?", "?", "¸", "?", "?",
482 "?", "|", "&", "\"", "\'", "\000", ":" }),
489 * Replace umlauts, <, >, & etc. with HTML entities.
490 * This method was taken from Pike Protocols.HTTP.unentity() and reversed.
492 * @param str the string to replace (utf-8 encoding expected)
493 * @return a string with problematic characters quoted to html entities
495 string quote_html ( string str )
499 ({ "?", "¡", "¬", "¿", "?", "?",
500 "?", "«", "?", "?", " ", "»",
501 "À", "Õ", "?", "Ã", "?", "?",
502 "?", "?", "?", "ÿ", "?", "÷",
503 "?", "?", "?", "?", "?", "?",
504 "·", "?", "Ê", "?", "'", "Â",
505 "*", "?", "?", "¶", "Á", "¢",
506 ":", ",", "@", "©", "?", "$",
507 "È", "Í", "Ë", " ", " ", "=",
508 "?", "Î", "!", "?", "º", "æ",
509 "⅛", "⅜", "⅝", "⅞", ">", ">",
510 "½", "‐", "Ì", "Ó", "°", "Ï",
511 "ø", "Ô", "´", "(", "[", "<",
512 "<", "—", "µ", "?", "", "–",
513 "¨", "Ò", "Û", "Ù", "Ú", "¯",
514 "?", "?", "?", "%", ".", "+",
515 "±", "£", "?", "\"", "ª", "Æ",
516 ")", "]", "ß", ";", "?", "?",
517 "?", "?", "?", "?", "~", "™",
518 "?", "?", "?", "¸", "?", "?",
519 "?", "|", "&", "\"", "\'", "\000", ":" }),
521 ({ "Æ", "Á", "Â", "À", "Å", "Ã",
522 "Ä", "Ç", "Ð", "É", "Ê", "È",
523 "Ë", "Í", "Î", "Ì", "Ï", "Ñ",
524 "Ó", "Ô", "Ò", "Ø", "Õ", "Ö",
525 "Þ", "Ú", "Û", "Ù", "Ü", "Ý",
526 "á", "â", "æ", "à", "'", "å",
527 "*", "ã", "ä", "¦", "ç", "¢",
528 ":", ",", "@", "©", "°", "$",
529 "é", "ê", "è", " ", " ", "=",
530 "ð", "ë", "!", "½", "¼", "¾",
531 "⅛", "⅜", "⅝", "⅞", ">", ">",
532 "½", "‐", "í", "î", "¡", "ì",
533 "¿", "ï", "«", "(", "[", "<",
534 "<", "—", "µ", "·", " ", "–",
535 "¬", "ñ", "ó", "ô", "ò", "ø",
536 "õ", "ö", "¶", "%", ".", "+",
537 "±", "£", "?", """, "»", "®",
538 ")", "]", "§", ";", "­", "¹",
539 "²", "³", "ß", "þ", "˜", "™",
540 "ú", "û", "ù", "ü", "ý", "¥",
541 "ÿ", "|", "&", """, "'", "�", ":" }),
547 string describe() { return "htmllib"; }