--- ../zope_static_mirror.py Sun Nov 30 18:17:05 2003 +++ zope_static_mirror.py Fri Dec 19 17:39:26 2003 @@ -58,20 +58,20 @@ # folder in which to store mirror -local_base = 'mirror-2' +local_base = 'mirror' -domain = 'example.com' +domain = 'localhost:8080' # 2 lists of tuples: (subdomain name, remote and local folder name) # this one maps between subdomains and the folders they are stored in # list all domains here which should be fixed in any document -fixup_domains = [('', ''), ('www', ''), ('staff', 'staff'), ('students', 'students'), ('daily', 'news'), ('wiki', 'wiki')] +fixup_domains = [('', '')] # this also maps between subdomains and the folders they are stored in # list those here that you want to actually download and mirror # notice that in my case, www.example.com and example.com both point # to the same content and live in a folder called www on the server # but live in the local_base folder in the mirror -domains = [('', ''), ('staff', 'staff'), ('students', 'students'), ('daily', 'news'), ('wiki', 'wiki')] +domains = [('', '')] # These are here in case I just want to update one section #domains = [('', '')] @@ -83,9 +83,13 @@ # control program operation next_file_delay = 0 # seconds -maximum_depth = 0 +maximum_depth = 20 force_doc_update = 0 -fixup_only = 1 +fixup_only = 0 + +if len(sys.argv) > 1: + print 'Force doc update: ', sys.argv[1] + force_doc_update = 1 local_file_temp_extension = '._original_' @@ -104,9 +108,9 @@ file_types = ['Image', 'File'] # These are objects usually acquired from the root directory -force_toplevel_folders = ['images', 'contact'] +force_toplevel_folders = ['images'] force_toplevel_files = ['favicon.ico'] -force_toplevel_docs = ['new'] +force_toplevel_docs = ['images.html']#'new'] # Date style should match that used on the server side cur_date = time.strftime('%Y-%m-%d %H:%M') @@ -127,6 +131,11 @@ if a[0] == 'src': self.link_list.append(a[1]) + def start_td(self, attributes): + for a in attributes: + if a[0] == 'background': + self.link_list.append(a[1]) + def start_body(self, attributes): for a in attributes: if a[0] == 'background': @@ -174,6 +183,7 @@ self.mirror_folder_zope(full_domain, folder, depth=1) print 'done' print + def mirror_folder(self, full_domain, local_folder, depth=1): if maximum_depth and depth > maximum_depth: @@ -192,7 +202,10 @@ mod_dates = get_saved_mod_dates(saved_file_list) #print folder, 'files', file_list for a_file, mod_date in file_list: - local_file = get_full_path(local_base, local_folder, a_file) + b_file = a_file + if a_file == 'index_html': + b_file = 'index.html' + local_file = get_full_path(local_base, local_folder, b_file) exists = os.path.exists(local_file) url = 'http://%s/%s/%s' % (full_domain, remote_folder, a_file) if not exists or (mod_dates.has_key(a_file) and mod_date > mod_dates[a_file]): @@ -203,7 +216,7 @@ data = readURL(url) f.write(data) else: - print 'up-to-date %s/%s' % (local_folder, a_file) + print 'up-to-date %s/%s' % (local_folder, b_file) save_a_list(file_list, self.local_base, local_folder, '.save_file_list') saved_doc_list = load_a_list(self.local_base, local_folder, '.save_doc_list') @@ -215,9 +228,13 @@ saved_mod_dates = get_saved_mod_dates(saved_doc_list) #print 'mod_dates', mod_dates for doc, mod_date in doc_list: - local_file = get_full_path(self.local_base, local_folder, doc) + b_doc = doc + if doc == 'index_html': + b_doc = 'index.html' + local_file = get_full_path(self.local_base, local_folder, b_doc) local_file_temp = local_file + local_file_temp_extension exists = os.path.exists(local_file_temp) + if force_doc_update or not exists or (saved_mod_dates.has_key(doc) and mod_date > saved_mod_dates[doc]): if exists and doc == 'index_html' and mod_date == cur_date and not force_doc_update: print 'not updating index_html' @@ -242,8 +259,8 @@ f_temp.close() print '%s/%s' % (local_folder, doc), print 'fixing', - f = file(local_file, 'w') data = self.fixup(data, depth) + f = file(local_file, 'w') f.write(data) f.close() print 'done' @@ -263,7 +280,7 @@ change_to = '../'*self.depth if self.local_folder: change_to += self.local_folder+'/' - r = '%s="%sindex_html"' % (matchobj.group(1), change_to) + r = '%s="%sindex.html"' % (matchobj.group(1), change_to) print 'undomain', r return r @@ -284,15 +301,15 @@ change_from = "http://%s/" % full_domain if self.local_folder == '': if local_folder == '': - change_to = '%s%s/' % ('../' * (depth), self.local_base) + change_to = '%s' % ('../' * (depth-1)) else: - change_to = '%s%s/%s/' % ('../' * (depth), self.local_base, local_folder) + change_to = '%s%s/' % ('../' * (depth-1), local_folder) else: if local_folder == '': - change_to = '%s%s/' % ('../' * (depth+1), self.local_base) + change_to = '%s' % ('../' * (depth)) else: - change_to = '%s%s/%s/' % ('../' * (depth+1), self.local_base, local_folder) + change_to = '%s%s/' % ('../' * (depth), local_folder) data = data.replace(change_from, change_to) change_from = "http://%s" % full_domain @@ -334,15 +351,17 @@ doc_list = load_a_list(self.local_base, local_folder, '.save_doc_list') for doc, mod_date in doc_list: - print '%s/%s fixup_zope' % (pathname, doc), - local_file = get_full_path(self.local_base, local_folder, doc) + b_doc = doc + if doc == 'index_html': + b_doc = 'index.html' + print '%s/%s fixup_zope' % (pathname, b_doc), + local_file = get_full_path(self.local_base, local_folder, b_doc) f = file(local_file, 'r+') data = f.read() + data = self.fixup_zope(data, depth) + f.seek(0) f.truncate() - - data = self.fixup_zope(data, depth) - f.write(data) f.close() print 'done' @@ -364,18 +383,14 @@ doc = matchobj.group(2)[1:] if self.local_folder: - pathname = '%s/%s/%s' % (self.local_base, self.local_folder, doc) + pathname = '/%s/%s' % (self.local_folder, doc) else: - pathname = '%s/%s' % (self.local_base, doc) + pathname = '%s' % (doc) - change_to = '../'*self.depth - if self.local_folder: - change_to += self.local_folder+'/' - else: - change_to += self.local_base+'/' + change_to = '../'*(self.depth-1) if os.path.isdir(pathname): - doc += '/index_html' + doc += '/index.html' r = '%s="%s%s"' % (matchobj.group(1), change_to, doc) print 'unroot', r @@ -398,7 +413,7 @@ def add_index_html(self, matchobj): doc = matchobj.group(2) - doc += '/index_html' + doc += '/index.html' r = '%s="%s"' % (matchobj.group(1), doc) print 'add_index', r @@ -427,7 +442,7 @@ def add_index_html(matchobj): #print 'group', matchobj.group(), matchobj.group(1), matchobj.group(2) - return '%s="%sindex_html"' % (matchobj.group(1), matchobj.group(2)) + return '%s="%sindex.html"' % (matchobj.group(1), matchobj.group(2)) def list_all(full_domain, folder):