2 class dbContentReader {
8 void read_file_from_path(object db, string path, object file)
10 array result = db->query(
11 "select ob_id from ob_data where ob_attr='OBJ_PATH' AND ob_data='\""+
13 if (sizeof(result)>0) {
14 int oid = result[0]["ob_id"];
15 read_file_from_oid(db, oid, file);
18 werror("Unable to retrieve file %s - path not found!\n", path);
21 void read_file_from_oid(object db, int oid, object file)
23 array doc_id = db->query(
24 "select ob_data from ob_data where ob_attr='CONTENT_ID' AND ob_id='"+
27 read_file(db, (int)doc_id[0]["ob_data"], file);
29 werror("Unable to retrieve file %O - no Document!\n", oid);
32 string get_path(object db, int oid)
34 array res = db->query("select ob_data from ob_data where ob_id="+
35 oid + " and ob_attr='OBJ_PATH'");
37 array env = db->query("select ob_data from ob_data where ob_id="+
38 oid + " and ob_attr='Environment'");
41 sscanf(env[0]["ob_data"], "%%%d", envid);
43 array creator = db->query("select ob_data from ob_data where ob_id="+oid+" AND ob_attr='Creator'");
44 if (sizeof(creator)>0) {
46 sscanf(creator[0]["ob_data"], "%%%d", creatorid);
47 array wr = db->query("select ob_data from ob_data where ob_id="+creatorid + " AND (ob_attr='GROUP_WORKROOM' OR ob_attr='USER_WORKROOM')");
49 return "/home/"+get_name(db, creatorid);
52 else if (envid==oid) {
53 werror("Fatal error: Object %d is in itself!\n");
56 return get_path(db, envid) + "/" + get_name(db, oid);
61 sscanf(res[0]["ob_data"], "\"%s\"", p);
65 string get_name(object db, int oid)
67 array res = db->query("select ob_data from ob_data where ob_id='"+
68 oid + "' and ob_attr='identifier'");
71 string name = res[0]["ob_data"];
73 return l>0?name[1..l-2]:name;
76 void check_path(object db)
80 array objects = db->query("select ob_id from ob_class where ob_class like '/classes/Doc%'");
81 if (sizeof(objects)>0) {
82 write("Checking path for " + sizeof(objects) + " objects !\n");
83 foreach(objects, mixed obj) {
84 int oid = (int)obj["ob_id"];
85 array res = db->query("select ob_data from ob_data where ob_id="+
86 oid + " and ob_attr='OBJ_PATH'");
88 string path = get_path(db, oid);
90 db->query("update ob_data SET ob_data='\""+
91 db->quote(path)+"\"' where "+
92 "ob_id="+oid + " AND ob_attr='OBJ_PATH'");
100 werror("Fixed %d Path (%d failed)\n", fixed, fail);
103 string content_id_to_path(int content_id) {
104 if (content_id==0) return 0;
105 string path = sprintf("%05d", content_id);
106 int tmp_id = content_id >> 8;
108 path = sprintf("%02x/",tmp_id&0xff)+path;
109 tmp_id = tmp_id >> 8;
116 string mapPath(object db, int oid, string path) {
117 string bname=basename(path);
118 if ((string)((int)bname) == bname)
123 string mapPathId(object db, int oid, string path) {
124 array result = db->query(
125 "select ob_data from ob_data where ob_attr='CONTENT_ID' AND ob_id="
127 if (sizeof(result)>0) {
128 int cid = (int)result[0]["ob_data"];
129 return content_id_to_path(cid);
137 void read_files_from_path(object db, string path, mapping params, function mapPathFunction)
139 if (!params->output) {
140 werror("You need to specify an output directory! (--output=)\n");
143 array result = db->query(
144 "select ob_id, ob_data from ob_data where ob_attr='OBJ_PATH' AND "+
145 "ob_data like '\""+path+"%';");
146 if (sizeof(result)>0) {
147 write("Fetching %d Document from Database in Path %s\n", sizeof(result),
149 for (int i = 0; i < sizeof(result); i++) {
150 int oid = (int)result[i]["ob_id"];
151 string p = (string)result[i]["ob_data"];
152 Stdio.mkdirhier(params->output);
153 if (params->output[-1]!='/')
154 params->output += "/";
155 sscanf(p, "\"%s\"", p);
157 if (functionp(mapPathFunction)) {
158 p = mapPathFunction(db, oid, p);
163 string name = params->output + replace(p, "/versions", "__versions");
164 name = replace(name, "/annotations", "__annotations");
165 array directory = name / "/";
167 array classResult = db->query(
168 "select ob_class, obversionof from ob_class where ob_id='"+ oid + "';");
169 if (sizeof(classResult) > 0) {
170 string obclass = classResult[0]["ob_class"];
171 if (search(obclass, "/classes/Doc")==0 &&
172 classResult[0]["obversion"] == 0)
174 array doc_id = db->query(
175 "select ob_data from ob_data where ob_attr='CONTENT_ID' AND ob_id='"+
177 if (sizeof(doc_id)>0) {
178 if ( sizeof(directory) > 1 ) {
179 Stdio.mkdirhier(directory[..sizeof(directory)-2] * "/");
181 string bname = basename(name);
182 // only integer name in path
183 write("Creating file " + name + "\n");
184 Stdio.File f = Stdio.File(name, "wct");
185 read_file(db, (int)doc_id[0]["ob_data"], f);
189 else if (obclass == "/classes/Container" || obclass == "/classes/Room")
191 if (mapPathFunction==mapPathId)
193 Stdio.mkdirhier(name);
199 werror("Unable to retrieve files in %s - path not found!\n", path);
202 void read_file(object db, int id, object file)
204 Sql.sql_result odbData = db->big_query("select rec_data from doc_data "+
205 "where doc_id="+id+" order by rec_order");
208 while (array line = odbData->fetch_row()) {
209 transfer+=strlen(line[0]);
210 file->write(line[0]);
214 void main(int argc, array args) {
215 // params are --file= or --oid= or nothing and --output=<directory>
220 mapping params = ([ ]);
221 mapping mimetypes = ([
224 "application/msword": "doc",
225 "application/pdf": "pdf",
228 "text/plain": "text",
230 "image/tiff": "tiff",
231 "application/wnd.ms-powerpoint":"ppt",
232 "application/x-shockwave-flash": "swf",
233 "application/x-gzip":"zip",
234 "application/x-gtar": "gtar",
235 "application/x-tar": "tar",
236 "audio/x-pn-realaudio": "ra",
237 "audio/x-wav": "wav",
239 "video/x-msvideo": "avi",
240 "video/x-ms-wmv": "wmv",
241 "application/vnd.ms-excel": "xls",
242 "source/pike": "pike",
246 "application/x-javascript": "js",
249 params["db"] = "mysql://steam:steam@localhost/steam";
250 for(int i=1; i<argc;i++) {
252 if (sscanf(args[i], "--%s=%s", type, val) >=2)
254 else if (sscanf(args[i], "--%s", type)>=1)
258 Sql.Sql db = Sql.Sql(params->db);
260 if (params["check-path"]) {
266 if (sscanf(params["oid"], "%d", oid)>0) {
267 Stdio.File f = Stdio.File(oid+".file", "wct");
268 read_file(db, oid, f);
273 if (params["file"]) {
274 Stdio.File f = Stdio.File(basename(params->file), "wct");
275 read_file_from_path(db, params["file"], f);
279 if (params["files"]) {
280 if (params["mode"]=="hash")
281 read_files_from_path(db, params->files, params, mapPathId);
283 read_files_from_path(db, params->files, params, mapPath);
284 transfer = transfer / (1024*1024);
285 tt = max(time() - tt, 1);
287 write("-- %d Documents in %d seconds, %d mb, %d mb/s", documents,
288 tt, transfer, transfer/tt);
292 write("Getting DOC IDs ....\n");
293 Sql.sql_result res = db->big_query("select distinct doc_id from doc_data");
294 array doc_ids = allocate(res->num_rows());
295 for(int i=0;i<sizeof(doc_ids);i++)
296 doc_ids[i]=(int)res->fetch_row()[0];
297 write("Found %d Data entries in Database...\n", sizeof(doc_ids));
299 res = db->big_query("select distinct ob_data from ob_data where ob_attr='CONTENT_ID';");
301 array content_ids = allocate(res->num_rows());
302 write("Found %d Documents in Database ...\n", sizeof(content_ids));
303 for (int i=0;i<res->num_rows();i++) {
304 content_ids[i] = (int)res->fetch_row()[0];
306 array unallocated = doc_ids - content_ids;
307 write("There are %d lost entries in the Database!\n", sizeof(unallocated));
309 if (params->output) {
310 write("Saving Files to %s\n", params->output);
311 string dirname = params->output;
312 Stdio.mkdirhier(dirname);
313 if ( dirname[-1] != '/')
315 foreach(unallocated, int docid) {
316 string fname = dirname + docid + ".file";
317 Stdio.File f = Stdio.File(fname, "wct");
318 read_file(db, docid, f);
320 // try to get information
321 Stdio.File outfile = Stdio.File("mimetype.out.tmp", "wct");
322 int PCode = Process.create_process(
323 ({ "file", "-i", fname }),
325 "stdout" : outfile, ])
328 string mimetype, ext;
330 if (sscanf(Stdio.read_file("mimetype.out.tmp"),fname+": %s; %*s",mimetype) ||
331 sscanf(Stdio.read_file("mimetype.out.tmp"),fname+": %s, %*s",mimetype))
333 if (mimetypes[mimetype])
334 ext = mimetypes[mimetype];
336 sscanf(fname, "%s.file", fname);
337 mv(fname+".file", fname + "." + ext);