DocHTML._pike
Go to the documentation of this file.
1 /* Copyright (C) 2000-2004 Thomas Bopp, Thorsten Hampel, Ludger Merkens
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation; either version 2 of the License, or
6  * (at your option) any later version.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16  *
17  * $Id: DocHTML.pike,v 1.2 2009/08/07 16:14:56 nicke Exp $
18  */
19 inherit "/classes/Document";
20 #include <macros.h>
21 #include <classes.h>
22 #include <assert.h>
23 #include <database.h>
24 #include <exception.h>
25 #include <attributes.h>
26 #include <events.h>
27 //! This document type holds html data and handles link consistency.
28 class DocHTML : public Document{
29 public:
30 
31 
32 
33 
34 
35 
36 private function fExchange;
37 private int __size;
38 private string sFilePosition;
39 private object oParser;
40  mapping mLinks;
41 
42 #define MODE_NORMAL 0
43 #define MODE_STRING 1
44 
45 /**
46  * Initialize the document and set data storage.
47  *
48  */
49 private:
50  void init_document()
51 {
52  mLinks = ([ ]);
53  add_data_storage(STORE_HTMLLINK, store_links, restore_links);
54 }
55 
56 public:
57 
58 
59 /**
60  * Return the quoted tag.
61  *
62  * @param Parser.HTML p - parser context.
63  * @param string tag - the tag.
64  * @return quoted tag.
65  */
66 protected:
67  mixed quote(Parser.HTML p, string tag) {
68  return ({ "<!--"+tag+"-->" });
69 }
70 
71 public:
72 /**
73  * A scrip tag was found while parsing.
74  *
75  * @param Parser.HTML p - the parser context.
76  * @return script tag.
77  */
78 protected:
79  mixed script(Parser.HTML p, string tag) {
80  LOG("Script Tag!!!\n"+tag+"\nEND\n");
81  return ({ "<SCRIPT "+tag+"SCRIPT>" });
82 }
83 
84 public:
85 
86 /**
87  * Main function for link exchange. Called every time a potential
88  * link tag was parsed.
89  *
90  * @param Parser.HTML p - the parser context.
91  * @param string tag - the tag found.
92  * @return tag with exchanged links.
93  */
94 protected:
95  mixed exchange_links(Parser.HTML p, string tag) {
96  array attr;
97  mapping attributes;
98  string attribute;
99  bool link = false;
100  string tname;
101  int mode, i, l;
102 
103  attributes = ([ ]);
104 
105  // MESSAGE("TAG:"+tag);
106 
107  l = strlen(tag);
108  mode = MODE_NORMAL;
109  i = 1;
110  tname = "";
111  int start = 1;
112 
113  attr = ({ });
114  while ( i < l ) {
115  if ( tag[i] == '"' || tag[i] == '\'' )
116  mode = (mode+1)%2;
117  else if ( (tag[i] == ' ' || tag[i] == '\t' || tag[i]=='\n') &&
118  mode == MODE_NORMAL )
119  {
120  attr += ({ tag[start..i-1] });
121  start = i+1;
122  }
123  i++;
124  }
125 
126  if ( tag[l-2] == '/' ) {
127  if ( start < l-3 )
128  attr += ({ tag[start..l-3] });
129  }
130  else if ( start <= l-2 ) {
131  attr += ({ tag[start..l-2] });
132  }
133 
134  if ( arrayp(attr) && sizeof(attr) > 0 ) {
135  string a, b;
136  int p;
137 
138  tname = attr[0];
139  for ( int i = 1; i < sizeof(attr); i++ ) {
140  if ( (p = search(attr[i], "=")) > 0 ) {
141  a = attr[i][..p-1];
142  b = attr[i][p+1..];
143  if ( strlen(b) > 0 ) {
144  if ( b[0] == '"' || b[0] == '\'' )
145  b = b[1..strlen(b)-2];
146  attributes[a] = b;
147  }
148  }
149  }
150  }
151  attr = indices(attributes);
152  foreach(attr, attribute) {
153  if ( lower_case(attribute) == "src" ||
154  lower_case(attribute) == "href" ||
155  lower_case(attribute) == "background" )
156  {
157  mixed err = catch {
158  mixed res = fExchange(attributes[attribute]);
159  if ( intp(res) && res > 0 ) {
160  attributes["oid"] = (string)res;
161  attr += ({ "oid" });
162  }
163  else if ( stringp(res) )
164  attributes[attribute] = res;
165  };
166  if ( err != 0 )
167  FATAL("Error exchange links: %O\n%O", err[0], err[1]);
168  link = true;
169  }
170  else if ( lower_case(attribute) == "content" ) {
171  string ctype;
172  if ( sscanf(attributes[attribute], "%*scharset=%s", ctype) )
173  do_set_attribute(DOC_ENCODING, lower_case(ctype));
174  }
175  }
176 
177 
178  string result;
179 
180 
181  if ( link ) {
182  result = "<"+tname;
183  foreach(attr, attribute) {
184  result += " " + attribute + "=\""+attributes[attribute] + "\"";
185  }
186  if ( search(tag, "/>") > -1 )
187  result += "/>";
188  else
189  result += ">";
190  //werror("Exchanged Tag: " + result+"\n");
191  }
192  else
193  result = tag;
194 
195  return ({ result }); // nothing to be done
196 }
197 
198 public:
199 
200 class UploadHTMLParser {
201 public:
202  object oContentHandle;
203  void create(object ContentHandle) {
204  oContentHandle = ContentHandle;
205  }
206 
207  /**
208  * Callback function to save a chunk of data received by the server.
209  *
210  * @param string chunk - the received chunk.
211  */
212  void save_chunk(string chunk) {
213  mixed err;
214  if ( objectp(oParser) ) {
215  if ( !stringp(chunk) ) {
216  err = catch(oParser->finish());
217  if ( err != 0 )
218  FATAL("Parsing HTML failed: %O:%O\n", err[0], err[1]);
219 
220  destruct(oParser);
221  oContentHandle->save_chunk(0);
222  return;
223  }
224  else {
225  err = catch(oParser->feed(chunk, 1));
226  if ( err != 0 )
227  FATAL("Parsing HTML failed: %O:%O\n", err[0], err[1]);
228  }
229  }
230 
231  if ( stringp(chunk) ) {
232  oContentHandle->save_chunk(chunk);
233  }
234  else
235  oContentHandle->save_chunk(0);
236  }
237 
238 }
239 
240 /**
241  * Function to start an upload. Returns the save_chunk function.
242  *
243  * @param int content_size the size of the content.
244  * @return upload function.
245  */
246 function receive_content(int content_size)
247 {
248  object obj = CALLER;
249  if ( (obj->get_object_class() & CLASS_USER) &&
250  (functionp(obj->get_user_object) ) &&
251  objectp(obj->get_user_object()) )
252  obj = obj->get_user_object();
253 
254  try_event(EVENT_UPLOAD, obj, content_size);
255 
256  sFilePosition = _FILEPATH->object_to_path(this_object());
257  oParser = Parser.HTML();
258  oParser->_set_tag_callback(exchange_links);
259  oParser->add_quote_tag("!--", quote, "--");
260  oParser->add_quote_tag("SCRIPT", script, "SCRIPT");
261  oParser->add_quote_tag("script", script, "script");
262  fExchange = exchange_ref;
263  reset_links();
264 
265  // duplicate object with old content id
266  int version = do_query_attribute(DOC_VERSION);
267  if ( !version )
268  version = 1;
269  else {
270  seteuid(get_creator());
271  object oldversion = duplicate( ([ "content_id": get_content_id(), ]));
272  mapping versions = do_query_attribute(DOC_VERSIONS);
273  oldversion->set_attribute(DOC_VERSIONS, copy_value(versions));
274  if ( !mappingp(versions) )
275  versions = ([ ]);
276  versions[version] = oldversion;
277 
278  oldversion->set_attribute(DOC_LAST_MODIFIED, do_query_attribute(DOC_LAST_MODIFIED));
279  oldversion->set_attribute(DOC_USER_MODIFIED, do_query_attribute(DOC_USER_MODIFIED));
280  oldversion->set_attribute(OBJ_CREATION_TIME, do_query_attribute(OBJ_CREATION_TIME));
281 
282  version++;
283  do_set_attribute(DOC_VERSIONS, versions);
284  }
285  do_set_attribute(DOC_VERSION, version);
286 
287  do_set_attribute(DOC_LAST_MODIFIED, time());
288  do_set_attribute(DOC_USER_MODIFIED, this_user());
289 
290  object oContentHandler = get_upload_handler(content_size);
291  object oUploadHTMLParser = UploadHTMLParser(oContentHandler);
292  return oUploadHTMLParser->save_chunk;
293 }
294 
295 /**
296  * Create a path inside steam which is a sequenz of containers.
297  *
298  * @param string p - the path to create.
299  * @return the container created last.
300  */
301 protected:
302  object create_path(string p)
303 {
304  //MESSAGE("create_path("+p+")");
305  if ( strlen(p) == 0 )
306  return get_environment();
307 
308  array tokens = p / "/";
309  object cont = _ROOTROOM;
310  object factory = _Server->get_factory(CLASS_CONTAINER);
311 
312  for ( int i = 0; i < sizeof(tokens)-1; i++) {
313  object obj;
314  if ( tokens[i] == "" )
315  continue;
316  obj = _FILEPATH->resolve_path(cont, tokens[i]);
317  if ( !objectp(obj) ) {
318  obj = factory->execute((["name":tokens[i],]));
319  obj->move(cont);
320  }
321  //else MESSAGE("Found path in cont: " + tokens[i]);
322  cont = obj;
323  }
324  //MESSAGE("Found:" + cont->get_identifier());
325  return cont;
326 }
327 
328 public:
329 
330 int exchange_ref(string link)
331 {
332  object obj;
333  string linkstr, position, type;
334 
335  if ( !objectp(get_environment()) )
336  return 0;
337 
338  link = replace(link, "\\", "/");
339  if ( search(link, "get.pike") >= 0 || search(link, "navigate.pike") >= 0 )
340  return 0;
341  if ( sscanf(link, "%s://%s", type, linkstr) == 2 ) {
342  add_extern_link(linkstr, type);
343  return 0;
344  }
345  if ( sscanf(link, "mailto:%s", linkstr) == 1 )
346  {
347  add_extern_link(linkstr, "mailto");
348  return 0;
349  }
350  if ( sscanf(lower_case(link), "javascript:%s", linkstr) == 1 )
351  return 0;
352  if ( sscanf(link, "%s#%s", linkstr, position) == 2 ) {
353  link = linkstr;
354  }
355 
356  if ( link == get_identifier() ) {
357  return 0;
358  }
359  link = combine_path(_FILEPATH->object_to_filename(get_environment()),
360  link);
361  mixed err = catch {
362  obj = _FILEPATH->path_to_object(link);
363  };
364  if ( !objectp(obj) )
365  return 0;
366 
367  add_local_link(obj, type, position, link);
368  return obj->get_object_id();
369 }
370 
371 object get_link(string href)
372 {
373  mapping links = do_query_attribute(OBJ_LINKS);
374  if ( mappingp(links) )
375  return links[href];
376  return 0;
377 }
378 
379 
380 
381 /**
382  * Return mapping with save data used by _Database.
383  *
384  * @return all the links.
385  */
386 mixed
387 private:
388 store_links()
389 {
390  if ( CALLER != _Database )
391  THROW("Caller is not Database !", E_ACCESS);
392  return ([ "Links": mLinks, ]);
393 }
394 
395 public:
396 
397 /**
398  * Restore the saved link data. This is called by database and
399  * sets the Links mapping again.
400  *
401  * @param mixed data - saved data.
402  */
403 private:
404 void restore_links(mixed data)
405 {
406  if (CALLER != _Database ) THROW("Caller is not Database !", E_ACCESS);
407  mLinks = data["Links"];
408 }
409 
410 public:
411 
412 /**
413  * Add a local link.
414  *
415  * @param object o - the object containing a reference to this doc.
416  * @param string type - the typ of reference.
417  * @string position - where the link points.
418  */
419 protected:
420  void add_local_link(object o, string type, string position, string link)
421 {
422  if ( o->get_object_id() == get_object_id() )
423  return; // no links to ourself!
424  if ( !mappingp(mLinks[o]) )
425  mLinks[o] = ([ position: 1 ]);
426  else {
427  if ( zero_type(mLinks[o][position]) )
428  mLinks[o][position] = 1;
429  else
430  mLinks[o][position]++;
431  }
432  mapping links = do_query_attribute(OBJ_LINKS);
433  if ( !mappingp(links) )
434  links = ([ ]);
435  links[link] = o;
436  do_set_attribute(OBJ_LINKS, links);
437 
438  require_save(STORE_HTMLLINK);
439 }
440 
441 public:
442 
443 /**
444  * Get an array of links pointing to local(steam) objects.
445  *
446  * @return array of link objects.
447  */
448 array get_local_links()
449 {
450  array result = ({ });
451  array index = indices(mLinks);
452 
453  foreach(index, mixed idx) {
454  if ( objectp(idx) )
455  result += ({ idx });
456  }
457  return result;
458 }
459 
460 /**
461  * Add an extern link to some URL.
462  *
463  * @param string url - the url to point to.
464  * @param string type - the type of the link.
465  */
466 protected:
467  void add_extern_link(string url, string type)
468 {
469  if ( zero_type(mLinks[url]) )
470  mLinks[url] = 1;
471  else
472  mLinks[url]++;
473  require_save(STORE_HTMLLINK);
474 
475 }
476 
477 public:
478 
479 /**
480  * an object was deleted and so the link to this object is outdated !
481  *
482  */
483 void removed_link()
484 {
485  object creator = get_creator();
486  run_event(EVENT_REF_GONE, link, creator);
487 }
488 
489 
490 /**
491  * Reset all saved link data.
492  *
493  */
494 protected:
495  void reset_links()
496 {
497  // first remove all references on other objects
498  if ( mappingp(mLinks) ) {
499  foreach(indices(mLinks), mixed index) {
500  if ( objectp(index) && index->status() >= 0 ) {
501  }
502  }
503  }
504  mLinks = ([ ]);
505 }
506 
507 public:
508 
509 /**
510  * Get a copy of the Links mapping.
511  *
512  * @return copied link mapping.
513  */
514 mapping get_links()
515 {
516  return copy_value(mLinks);
517 }
518 
519 
520 /**
521  * Get the object class which is CLASS_DOCHTML of course.
522  *
523  * @return the object class.
524  */
525 int
526 get_object_class()
527 {
528  return ::get_object_class() | CLASS_DOCHTML;
529 }
530 
531 string get_class() { return "DocHTML"; }
532 
533 /**
534  * Get the size of the content which is the size of the document
535  * with exchanged links.
536  *
537  * @return the content size.
538  */
539 int get_content_size()
540 {
541  return (__size > 0 ? __size : ::get_content_size());
542 }
543 
544 
545 {
546  // todo: funktionen hinzu zum testen von create_path() und links austauschen
547 }
548 
549 
550 };