2 # -----------------------------------------------------------------------
3 # Copyright (C) 2008 Laurentian University
4 # Dan Scott <dscott@laurentian.ca>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 # -----------------------------------------------------------------------
17 # vim:et:sw=4:ts=4: set fileencoding=utf-8 :
20 Parse DTD files and XML files looking for trouble
30 '../../../Open-ILS/web/opac/locale/en-US/',
34 '../../../Open-ILS/xul/staff_client/server/',
35 '../../../Open-ILS/xul/staff_client/chrome/',
36 '../../../Open-ILS/web/reports/',
41 Parse entities files in known places
44 basedir = os.path.normpath(os.path.dirname(os.path.abspath(__file__)))
56 for p_dir in DTD_DIRS:
57 p_dir = os.path.normpath(os.path.join(basedir, p_dir))
58 file_list = os.listdir(p_dir)
59 for d_file in file_list:
60 if os.path.splitext(d_file)[1] == '.dtd':
61 dtd_files.append(os.path.join(p_dir, d_file))
63 prefix = os.path.commonprefix(dtd_files)
65 for d_file in dtd_files:
67 # Get the shortest unique address for this file
68 short_df = d_file[len(prefix):]
70 dtd_file = open(d_file, 'r')
77 # Get rid of trailing linefeed
81 unpack = re.search(r'<!ENTITY\s+(.+?)\s+([\'"])(.*?)\2\s*>', line)
83 print(unpack.groups())
85 # Skip anything other than entity definitions
86 # Note that this makes some massive assumptions:
87 # 1. that we only have with one entity defined per line
88 # 2. that we only have single-line entities
89 # 3. that the entity begins in position 0 on the line
90 if not unpack or not line or not line.startswith('<!ENTITY'):
93 # If we did not retrieve an entity and definition, that's probably not good
94 if len(unpack.groups()) != 3:
95 print("%s:%d: No entity defined on line [%s]" % (short_df, line_num, line))
98 entity_key, quote, value = unpack.groups()
100 print(entity_key, value)
102 if not entities.has_key(entity_key):
103 entities[entity_key] = [{'value': value, 'file': short_df}]
106 for entry in entities[entity_key]:
107 if entry['file'] == short_df:
108 print("%s:%d: Duplicate key '%s' in line [%s]" % (short_df, line_num, entity_key, line[0:-1]))
111 entities[entity_key].append({'value': value, 'file': short_df})
117 def check_files(entities):
119 Finds all the XUL, XHTML, and JavaScript files
122 basedir = os.path.normpath(os.path.dirname(os.path.abspath(__file__)))
126 for x_dir in XML_DIRS:
127 for root, dirs, files in os.walk(os.path.join(basedir, x_dir)):
129 if os.path.splitext(x_file)[1] == '.xul' or \
130 os.path.splitext(x_file)[1] == '.html' or \
131 os.path.splitext(x_file)[1] == '.xhtml':
132 check_xul(root, x_file, entities)
134 def check_xul(root, filename, entities):
136 Checks all XUL files to ensure:
137 * that the requested entity exists
138 * that every entity is actually required
143 # Typical entity usage:
144 # &blah.blah.blah_bity.blah;
145 strings = re.compile(r'''&([a-zA-Z:_][a-zA-Z0-9:_\-.]+);''')
147 xul = open(os.path.join(root, filename), 'r')
152 print("File: %s" % (os.path.normpath(os.path.join(root, filename))))
154 for s_match in strings.finditer(content):
156 if not entities.has_key(s_match.group(1)):
157 print("File: %s" % (os.path.normpath(os.path.join(root, filename))))
158 print("\tEntity %s not found, expected in %s" % (s_match.group(1), 'lang.dtd'))
161 bad_strings = re.compile(r'''&([^a-zA-Z:_]?[a-zA-Z0-9:_]*[^a-zA-Z0-9:_\-.;][a-zA-Z0-9:_\-.]*);''')
163 # Match character entities ( etc), which are okay
164 char_entity = re.compile(r'''^((#([0-9])+)|(#x([0-9a-fA-F])+))$''')
166 for s_match in bad_strings.finditer(content):
167 # Rule out character entities and URL concatenation
168 if (not char_entity.search(s_match.group(1))) and s_match.group(1) != "'":
169 print("File: %s" % (os.path.normpath(os.path.join(root, filename))))
170 print("\tBad entity: %s" % (s_match.group(1)))
173 print("\t%d entities found" % (num_strings))
175 if __name__ == '__main__':
176 entities = parse_entities()
177 check_files(entities)