fulltext._pike
Go to the documentation of this file.
1 /*
2  * create temporary table ft_id (ob_id int primary key, count int);
3  * insert into ft_id select ob_id, 0 from ob_data where ob_attr='DOC_MIME_TYPE' and ob_data='"text/html"';
4  * replace into ft_id select ob_id, 1 from doc_ft;
5  * delete from ft_id where count = 1;
6  */
7 class fulltext {
8 public:
9 
10 protected:
11  void check_ft_integrity()
12 {
13  Sql.Sql handle = Sql.Sql(serverCfg["database"]);
14  handle->query("create temporary table ft_id "+
15  "(ob_id int primary key, count int)");
16  handle->query("insert into ft_id select distinct "+
17  "ob_id,0 from ob_data where ob_attr='DOC_MIME_TYPE'"+
18  "and ob_data='\""+
19  indices(mStripFilter)*"\"' or ob_data='\""+"\"'");
20  handle->query("replace into ft_id select distinct ob_id, 1 from doc_ft");
21  handle->query("delete from ft_id where count =1");
22  array missing = handle->query("select distinct ob_id from ft_id");
23  handle->query("drop table ft_id");
24  foreach (missing, mixed a)
25  {
26  object o = connection->find_object((int)a["ob_id"]);
27  if (objectp(o))
28  oQueue->write(Doc(o));
29  }
30 }
31 
32 public:
33 
34 private:
35  private void got_kill(int sig)
36 {
37  _exit(1);
38 }
39 
40 public:
41 
42 int main(int argc, array argv)
43 {
44  init( "fulltext", argv + ({ "--eid="+EVENT_UPLOAD }) );
45 
46  if (catch{mStripFilter = read_config_file(CONFIG_DIR+"/services/fulltext.cfg");})
47  mStripFilter = ([ "text/html" : "html2text" ]);
48 
49  if ( !mStripFilter["text/plain"])
50  mStripFilter["text/plain"] = "";
51 
52  // check strip filters and remove any that don't work:
53  foreach ( indices(mStripFilter), string mime ) {
54  mixed executable = mStripFilter[ mime ];
55  if ( !stringp(mime) || !stringp(executable) || executable == "" )
56  continue;
57  mixed err = catch {
58  Process.create_process( ({ executable }), ([ ]) )->wait();
59  };
60  if ( err ) {
61  werror( "[%s] Executable for %s strip filter not working: %s (Error: %s)\n",
62  timelib.log_time(), mime, executable, err[0]-"\n" );
63  m_delete( mStripFilter, mime );
64  }
65  }
66 
67  signal(signum("QUIT"), got_kill);
68 
69  mixed err = catch{
70  create_table(serverCfg["database"]);
71  start();
72  persistence_module = send_cmd( 0, "get_module", "persistence" );
73  content_in_database = send_cmd( persistence_module, "get_store_content_in_database", 0 );
74  content_in_filesystem = send_cmd( persistence_module, "get_store_content_in_filesystem", 0 );
75  };
76 
77  tStripDemon = thread_create(strip_demon);
78  thread_create(check_ft_integrity);
79 
80  if (err)
81  werror("Startup of fulltext service failed.\n"+
82  master()->describe_backtrace(err)+"\n");
83  return -17;
84 }
85 
86 mapping read_config_file(string fname)
87 {
88  Parser.XML.Tree.Node node = Parser.XML.Tree.parse_file(fname);
89  if (!objectp(node))
90  error("Failed to parse config file %s\n", fname);
91 
92  mapping data = ([]);
93  node = node->get_first_element("config");
94  foreach(node->get_elements(), Parser.XML.Tree.Node n)
95  {
96  mapping attributes = n->get_attributes();
97  switch ( n->get_tag_name() ) {
98  case "doc_strip":
99  if ( !stringp(attributes->mime) )
100  error("Missing mime attribute for doc_strip in config file!");
101  data[attributes["mime"]] = n->get_last_child()->get_text();
102  break;
103  case "limits":
104  foreach(n->get_elements(), Parser.XML.Tree.Node l) {
105  int lsz = (int)l->get_last_child()->get_text();
106  werror( "[%s] Using Limit: " + l->get_tag_name() + " = "+ lsz +"\n",
107  timelib.log_time() );
108  mLimits[l->get_tag_name()] = lsz;
109  }
110  break;
111  default:
112  werror( "[%s] Unknown Tag in Config file : " + n->get_tag_name() +"\n",
113  timelib.log_time() );
114  }
115  }
116  return data;
117 }
118 
119 
120 };