| Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 from translate.filters import helpers
34 from translate.filters import decoration
35 from translate.filters import prefilters
36 from translate.filters import spelling
37 from translate.lang import factory
38 from translate.lang import data
39 # The import of xliff could fail if the user doesn't have lxml installed. For
40 # now we try to continue gracefully to help users who aren't interested in
41 # support for XLIFF or other XML formats.
42 try:
43 from translate.storage import xliff
44 except ImportError, e:
45 xliff = None
46 import re
47
48 # These are some regular expressions that are compiled for use in some tests
49
50 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as
51 # this should capture printf types defined in other platforms.
52 printf_pat = re.compile('%((?:(?P<ord>\d+)\$)*(?P<fullvar>[+#-]*(?:\d+)*(?:\.\d+)*(hh\|h\|l\|ll)*(?P<type>[\w%])))')
53
54 # The name of the XML tag
55 tagname_re = re.compile("<[\s]*([\w\/]*)")
56
57 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
58 #TODO: remove escaped strings once usage is audited
59 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
60
61 # The whole tag
62 tag_re = re.compile("<[^>]+>")
63
65 """Returns the name of the XML/HTML tag in string"""
66 return tagname_re.match(string).groups(1)[0]
67
69 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
70 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut
71 by only considering "c" if "b" has already matched."""
72 a, b, c = pair
73 if (b, c) == (None, None):
74 #This is a tagname
75 return pair
76 for pattern in list:
77 x, y, z = pattern
78 if (x, y) in [(a, b), (None, b)]:
79 if z in [None, c]:
80 return pattern
81 return pair
82
84 """Returns all the properties in the XML/HTML tag string as
85 (tagname, propertyname, propertyvalue), but ignore those combinations
86 specified in ignore."""
87 properties = []
88 for string in strings:
89 tag = tagname(string)
90 properties += [(tag, None, None)]
91 #Now we isolate the attribute pairs.
92 pairs = property_re.findall(string)
93 for property, value, a, b in pairs:
94 #Strip the quotes:
95 value = value[1:-1]
96
97 canignore = False
98 if (tag, property, value) in ignore or \
99 intuplelist((tag,property,value), ignore) != (tag,property,value):
100 canignore = True
101 break
102 if not canignore:
103 properties += [(tag, property, value)]
104 return properties
105
106
108 """This exception signals that a Filter didn't pass, and gives an explanation
109 or a comment"""
120
122 """This exception signals that a Filter didn't pass, and the bad translation
123 might break an application (so the string will be marked fuzzy)"""
124 pass
125
126 #(tag, attribute, value) specifies a certain attribute which can be changed/
127 #ignored if it exists inside tag. In the case where there is a third element
128 #in the tuple, it indicates a property value that can be ignored if present
129 #(like defaults, for example)
130 #If a certain item is None, it indicates that it is relevant for all values of
131 #the property/tag that is specified as None. A non-None value of "value"
132 #indicates that the value of the attribute must be taken into account.
133 common_ignoretags = [(None, "xml-lang", None)]
134 common_canchangetags = [("img", "alt", None)]
135
137 """object representing the configuration of a checker"""
138 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
139 notranslatewords=None, musttranslatewords=None, validchars=None,
140 punctuation=None, endpunctuation=None, ignoretags=None,
141 canchangetags=None, criticaltests=None, credit_sources=None):
142 # we have to initialise empty lists properly (default arguments get reused)
143 if accelmarkers is None:
144 accelmarkers = []
145 if varmatches is None:
146 varmatches = []
147 if musttranslatewords is None:
148 musttranslatewords = []
149 if notranslatewords is None:
150 notranslatewords = []
151 self.targetlanguage = targetlanguage
152 self.updatetargetlanguage(targetlanguage)
153 self.sourcelang = factory.getlanguage('en')
154 self.accelmarkers = accelmarkers
155 self.varmatches = varmatches
156 # TODO: allow user configuration of untranslatable words
157 self.notranslatewords = dict.fromkeys([data.forceunicode(key) for key in notranslatewords])
158 self.musttranslatewords = dict.fromkeys([data.forceunicode(key) for key in musttranslatewords])
159 validchars = data.forceunicode(validchars)
160 self.validcharsmap = {}
161 self.updatevalidchars(validchars)
162 punctuation = data.forceunicode(punctuation)
163 if punctuation is None:
164 punctuation = self.lang.punctuation
165 self.punctuation = punctuation
166 endpunctuation = data.forceunicode(endpunctuation)
167 if endpunctuation is None:
168 endpunctuation = self.lang.sentenceend
169 self.endpunctuation = endpunctuation
170 if ignoretags is None:
171 self.ignoretags = common_ignoretags
172 else:
173 self.ignoretags = ignoretags
174 if canchangetags is None:
175 self.canchangetags = common_canchangetags
176 else:
177 self.canchangetags = canchangetags
178 if criticaltests is None:
179 criticaltests = []
180 self.criticaltests = criticaltests
181 if credit_sources is None:
182 credit_sources = []
183 self.credit_sources = credit_sources
184
186 """combines the info in otherconfig into this config object"""
187 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
188 self.updatetargetlanguage(self.targetlanguage)
189 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
190 self.varmatches.extend(otherconfig.varmatches)
191 self.notranslatewords.update(otherconfig.notranslatewords)
192 self.musttranslatewords.update(otherconfig.musttranslatewords)
193 self.validcharsmap.update(otherconfig.validcharsmap)
194 self.punctuation += otherconfig.punctuation
195 self.endpunctuation += otherconfig.endpunctuation
196 #TODO: consider also updating in the following cases:
197 self.ignoretags = otherconfig.ignoretags
198 self.canchangetags = otherconfig.canchangetags
199 self.criticaltests.extend(otherconfig.criticaltests)
200 self.credit_sources = otherconfig.credit_sources
201
203 """updates the map that eliminates valid characters"""
204 if validchars is None:
205 return True
206 validcharsmap = dict([(ord(validchar), None) for validchar in data.forceunicode(validchars)])
207 self.validcharsmap.update(validcharsmap)
208
210 """Updates the target language in the config to the given target language"""
211 self.lang = factory.getlanguage(langcode)
212
214 """Parent Checker class which does the checking based on functions available
215 in derived classes."""
216 preconditions = {}
217
218 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
219 self.errorhandler = errorhandler
220 if checkerconfig is None:
221 self.setconfig(CheckerConfig())
222 else:
223 self.setconfig(checkerconfig)
224 # exclude functions defined in UnitChecker from being treated as tests...
225 self.helperfunctions = {}
226 for functionname in dir(UnitChecker):
227 function = getattr(self, functionname)
228 if callable(function):
229 self.helperfunctions[functionname] = function
230 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
231
233 """returns dictionary of available filters, including/excluding those in
234 the given lists"""
235 filters = {}
236 if limitfilters is None:
237 # use everything available unless instructed
238 limitfilters = dir(self)
239 if excludefilters is None:
240 excludefilters = {}
241 for functionname in limitfilters:
242 if functionname in excludefilters: continue
243 if functionname in self.helperfunctions: continue
244 if functionname == "errorhandler": continue
245 filterfunction = getattr(self, functionname, None)
246 if not callable(filterfunction): continue
247 filters[functionname] = filterfunction
248 return filters
249
251 """sets the accelerator list"""
252 self.config = config
253 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
254 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
255 for startmatch, endmatch in self.config.varmatches]
256 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone)
257 for startmatch, endmatch in self.config.varmatches]
258
260 """Sets the filename that a checker should use for evaluating suggestions."""
261 self.suggestion_store = store
262
266
270
274
276 """replaces words with punctuation with their unpunctuated equivalents"""
277 return prefilters.filterwordswithpunctuation(str1)
278
282
284 """Runs the given test on the given unit.
285
286 Note that this can raise a FilterFailure as part of normal operation"""
287 return test(unit)
288
290 """run all the tests in this suite, return failures as testname, message_or_exception"""
291 failures = {}
292 ignores = self.config.lang.ignoretests[:]
293 functionnames = self.defaultfilters.keys()
294 priorityfunctionnames = self.preconditions.keys()
295 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
296 for functionname in priorityfunctionnames + otherfunctionnames:
297 if functionname in ignores:
298 continue
299 filterfunction = getattr(self, functionname, None)
300 # this filterfunction may only be defined on another checker if using TeeChecker
301 if filterfunction is None:
302 continue
303 filtermessage = filterfunction.__doc__
304 try:
305 filterresult = self.run_test(filterfunction, unit)
306 except FilterFailure, e:
307 filterresult = False
308 filtermessage = str(e).decode('utf-8')
309 except Exception, e:
310 if self.errorhandler is None:
311 raise ValueError("error in filter %s: %r, %r, %s" % \
312 (functionname, unit.source, unit.target, e))
313 else:
314 filterresult = self.errorhandler(functionname, unit.source, unit.target, e)
315 if not filterresult:
316 # we test some preconditions that aren't actually a cause for failure
317 if functionname in self.defaultfilters:
318 failures[functionname] = filtermessage
319 if functionname in self.preconditions:
320 for ignoredfunctionname in self.preconditions[functionname]:
321 ignores.append(ignoredfunctionname)
322 return failures
323
325 """A checker that passes source and target strings to the checks, not the
326 whole unit.
327
328 This provides some speedup and simplifies testing."""
329 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
330 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
331
333 """Runs the given test on the given unit.
334
335 Note that this can raise a FilterFailure as part of normal operation."""
336 if self.hasplural:
337 for pluralform in unit.target.strings:
338 if not test(self.str1, pluralform):
339 return False
340 else:
341 return True
342 else:
343 return test(self.str1, self.str2)
344
346 """Do some optimisation by caching some data of the unit for the benefit
347 of run_test()."""
348 self.str1 = data.forceunicode(unit.source)
349 self.str2 = data.forceunicode(unit.target)
350 self.hasplural = unit.hasplural()
351 return super(TranslationChecker, self).run_filters(unit)
352
354 """A Checker that controls multiple checkers."""
355 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None,
356 checkerclasses=None, errorhandler=None, languagecode=None):
357 """construct a TeeChecker from the given checkers"""
358 self.limitfilters = limitfilters
359 if checkerclasses is None:
360 checkerclasses = [StandardChecker]
361 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
362 if languagecode:
363 for checker in self.checkers:
364 checker.config.updatetargetlanguage(languagecode)
365 # Let's hook up the language specific checker
366 lang_checker = self.checkers[0].config.lang.checker
367 if lang_checker:
368 self.checkers.append(lang_checker)
369
370 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
371 self.config = checkerconfig or self.checkers[0].config
372
374 """returns dictionary of available filters, including/excluding those in
375 the given lists"""
376 if excludefilters is None:
377 excludefilters = {}
378 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
379 self.combinedfilters = {}
380 for filters in filterslist:
381 self.combinedfilters.update(filters)
382 # TODO: move this somewhere more sensible (a checkfilters method?)
383 if limitfilters is not None:
384 for filtername in limitfilters:
385 if not filtername in self.combinedfilters:
386 import sys
387 print >> sys.stderr, "warning: could not find filter %s" % filtername
388 return self.combinedfilters
389
391 """run all the tests in the checker's suites"""
392 failures = {}
393 for checker in self.checkers:
394 failures.update(checker.run_filters(unit))
395 return failures
396
398 """Sets the filename that a checker should use for evaluating suggestions."""
399 for checker in self.checkers:
400 checker.setsuggestionstore(store)
401
402
404 """The basic test suite for source -> target translations."""
406 """checks whether a string has been translated at all"""
407 str2 = prefilters.removekdecomments(str2)
408 return not (len(str1.strip()) > 0 and len(str2) == 0)
409
411 """checks whether a translation is basically identical to the original string"""
412 str1 = self.filteraccelerators(str1)
413 str2 = self.filteraccelerators(str2)
414 if len(str1.strip()) == 0:
415 return True
416 if str1.isupper() and str1 == str2:
417 return True
418 if self.config.notranslatewords:
419 words1 = str1.split()
420 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
421 return True
422 str1 = self.removevariables(str1)
423 str2 = self.removevariables(str2)
424 if not (str1.strip().isdigit() or len(str1) < 2 or decoration.ispurepunctuation(str1.strip())) and (str1.strip().lower() == str2.strip().lower()):
425 raise FilterFailure("please translate")
426 return True
427
429 """checks whether a translation only contains spaces"""
430 len1 = len(str1.strip())
431 len2 = len(str2.strip())
432 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
433
435 """checks whether a translation is much shorter than the original string"""
436 len1 = len(str1.strip())
437 len2 = len(str2.strip())
438 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
439
441 """checks whether a translation is much longer than the original string"""
442 len1 = len(str1.strip())
443 len2 = len(str2.strip())
444 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
445
447 """checks whether escaping is consistent between the two strings"""
448 if not helpers.countsmatch(str1, str2, ("\\", "\\\\")):
449 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if "\\" in word])
450 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if "\\" in word])
451 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
452 else:
453 return True
454
456 """checks whether newlines are consistent between the two strings"""
457 if not helpers.countsmatch(str1, str2, ("\n", "\r")):
458 raise FilterFailure("line endings in original don't match line endings in translation")
459 else:
460 return True
461
463 """checks whether tabs are consistent between the two strings"""
464 if not helpers.countmatch(str1, str2, "\t"):
465 raise SeriousFilterFailure("tabs in original don't match tabs in translation")
466 else:
467 return True
468
470 """checks whether singlequoting is consistent between the two strings"""
471 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
472 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
473 return helpers.countsmatch(str1, str2, ("'", "''", "\\'"))
474
476 """checks whether doublequoting is consistent between the two strings"""
477 str1 = self.filteraccelerators(self.filtervariables(str1))
478 str1 = self.filterxml(str1)
479 str1 = self.config.lang.punctranslate(str1)
480 str2 = self.filteraccelerators(self.filtervariables(str2))
481 str2 = self.filterxml(str2)
482 return helpers.countsmatch(str1, str2, ('"', '""', '\\"', u"«", u"»"))
483
485 """checks for bad double-spaces by comparing to original"""
486 str1 = self.filteraccelerators(str1)
487 str2 = self.filteraccelerators(str2)
488 return helpers.countmatch(str1, str2, " ")
489
491 """checks for bad spacing after punctuation"""
492 if str1.find(u" ") == -1:
493 return True
494 str1 = self.filteraccelerators(self.filtervariables(str1))
495 str1 = self.config.lang.punctranslate(str1)
496 str2 = self.filteraccelerators(self.filtervariables(str2))
497 for puncchar in self.config.punctuation:
498 plaincount1 = str1.count(puncchar)
499 plaincount2 = str2.count(puncchar)
500 if not plaincount1 or plaincount1 != plaincount2:
501 continue
502 spacecount1 = str1.count(puncchar+" ")
503 spacecount2 = str2.count(puncchar+" ")
504 if spacecount1 != spacecount2:
505 # handle extra spaces that are because of transposed punctuation
506 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1:
507 continue
508 return False
509 return True
510
512 """checks whether printf format strings match"""
513 count1 = count2 = None
514 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
515 count2 = var_num2 + 1
516 if match2.group('ord'):
517 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
518 count1 = var_num1 + 1
519 if int(match2.group('ord')) == var_num1 + 1:
520 if match2.group('fullvar') != match1.group('fullvar'):
521 return 0
522 else:
523 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
524 count1 = var_num1 + 1
525 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
526 return 0
527
528 if count2 is None:
529 if list(printf_pat.finditer(str1)):
530 return 0
531
532 if (count1 or count2) and (count1 != count2):
533 return 0
534 return 1
535
537 """checks whether accelerators are consistent between the two strings"""
538 str1 = self.filtervariables(str1)
539 str2 = self.filtervariables(str2)
540 messages = []
541 for accelmarker in self.config.accelmarkers:
542 counter = decoration.countaccelerators(accelmarker)
543 count1, countbad1 = counter(str1)
544 count2, countbad2 = counter(str2)
545 getaccel = decoration.getaccelerators(accelmarker)
546 accel2, bad2 = getaccel(str2)
547 if count1 == count2:
548 continue
549 if count1 == 1 and count2 == 0:
550 if countbad2 == 1:
551 messages.append("accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
552 else:
553 messages.append("accelerator %s is missing from translation" % accelmarker)
554 elif count1 == 0:
555 messages.append("accelerator %s does not occur in original and should not be in translation" % accelmarker)
556 elif count1 == 1 and count2 > count1:
557 messages.append("accelerator %s is repeated in translation" % accelmarker)
558 else:
559 messages.append("accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
560 if messages:
561 if "accelerators" in self.config.criticaltests:
562 raise SeriousFilterFailure(messages)
563 else:
564 raise FilterFailure(messages)
565 return True
566
567 # def acceleratedvariables(self, str1, str2):
568 # """checks that no variables are accelerated"""
569 # messages = []
570 # for accelerator in self.config.accelmarkers:
571 # for variablestart, variableend in self.config.varmatches:
572 # error = accelerator + variablestart
573 # if str1.find(error) >= 0:
574 # messages.append("original has an accelerated variable")
575 # if str2.find(error) >= 0:
576 # messages.append("translation has an accelerated variable")
577 # if messages:
578 # raise FilterFailure(messages)
579 # return True
580
582 """checks whether variables of various forms are consistent between the two strings"""
583 messages = []
584 mismatch1, mismatch2 = [], []
585 varnames1, varnames2 = [], []
586 for startmarker, endmarker in self.config.varmatches:
587 varchecker = decoration.getvariables(startmarker, endmarker)
588 if startmarker and endmarker:
589 if isinstance(endmarker, int):
590 redecorate = lambda var: startmarker + var
591 else:
592 redecorate = lambda var: startmarker + var + endmarker
593 elif startmarker:
594 redecorate = lambda var: startmarker + var
595 else:
596 redecorate = lambda var: var
597 vars1 = varchecker(str1)
598 vars2 = varchecker(str2)
599 if vars1 != vars2:
600 # we use counts to compare so we can handle multiple variables
601 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
602 # filter variable names we've already seen, so they aren't matched by more than one filter...
603 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
604 varnames1.extend(vars1)
605 varnames2.extend(vars2)
606 vars1 = map(redecorate, vars1)
607 vars2 = map(redecorate, vars2)
608 mismatch1.extend(vars1)
609 mismatch2.extend(vars2)
610 if mismatch1:
611 messages.append("do not translate: %s" % ", ".join(mismatch1))
612 elif mismatch2:
613 messages.append("translation contains variables not in original: %s" % ", ".join(mismatch2))
614 if messages and mismatch1:
615 raise SeriousFilterFailure(messages)
616 elif messages:
617 raise FilterFailure(messages)
618 return True
619
621 """checks that function names are not translated"""
622 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
623
625 """checks that emails are not translated"""
626 return helpers.funcmatch(str1, str2, decoration.getemails)
627
629 """checks that URLs are not translated"""
630 return helpers.funcmatch(str1, str2, decoration.geturls)
631
633 """checks whether numbers of various forms are consistent between the two strings"""
634 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
635
637 """checks whether whitespace at the beginning of the strings matches"""
638 str1 = self.filteraccelerators(self.filtervariables(str1))
639 str2 = self.filteraccelerators(self.filtervariables(str2))
640 return helpers.funcmatch(str1, str2, decoration.spacestart)
641
643 """checks whether whitespace at the end of the strings matches"""
644 str1 = self.filteraccelerators(self.filtervariables(str1))
645 str2 = self.filteraccelerators(self.filtervariables(str2))
646 return helpers.funcmatch(str1, str2, decoration.spaceend)
647
649 """checks whether punctuation at the beginning of the strings match"""
650 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
651 str1 = self.config.lang.punctranslate(str1)
652 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
653 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
654
656 """checks whether punctuation at the end of the strings match"""
657 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
658 str1 = self.config.lang.punctranslate(str1)
659 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
660 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation)
661
663 """checks that strings that are purely punctuation are not changed"""
664 # this test is a subset of startandend
665 if (decoration.ispurepunctuation(str1)):
666 return str1 == str2
667 else:
668 return not decoration.ispurepunctuation(str2)
669
671 """checks that the number of brackets in both strings match"""
672 str1 = self.filtervariables(str1)
673 str2 = self.filtervariables(str2)
674 messages = []
675 missing = []
676 extra = []
677 for bracket in ("[", "]", "{", "}", "(", ")"):
678 count1 = str1.count(bracket)
679 count2 = str2.count(bracket)
680 if count2 < count1:
681 missing.append("'%s'" % bracket)
682 elif count2 > count1:
683 extra.append("'%s'" % bracket)
684 if missing:
685 messages.append("translation is missing %s" % ", ".join(missing))
686 if extra:
687 messages.append("translation has extra %s" % ", ".join(extra))
688 if messages:
689 raise FilterFailure(messages)
690 return True
691
693 """checks that the number of sentences in both strings match"""
694 sentences1 = len(self.config.sourcelang.sentences(str1))
695 sentences2 = len(self.config.lang.sentences(str2))
696 if not sentences1 == sentences2:
697 raise FilterFailure("The number of sentences differ: %d versus %d" % (sentences1, sentences2))
698 return True
699
701 """checks that options are not translated"""
702 str1 = self.filtervariables(str1)
703 for word1 in str1.split():
704 if word1 != "--" and word1.startswith("--") and word1[-1].isalnum():
705 parts = word1.split("=")
706 if not parts[0] in str2:
707 raise FilterFailure("The option %s does not occur or is translated in the translation." % parts[0])
708 if len(parts) > 1 and parts[1] in str2:
709 raise FilterFailure("The parameter %(param)s in option %(option)s is not translated." % {"param": parts[0], "option": parts[1]})
710 return True
711
713 """checks that the message starts with the correct capitalisation"""
714 str1 = self.filteraccelerators(str1)
715 str2 = self.filteraccelerators(str2)
716 if len(str1) > 1 and len(str2) > 1:
717 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
718 if len(str1) == 0 and len(str2) == 0:
719 return True
720 if len(str1) == 0 or len(str2) == 0:
721 return False
722 return True
723
725 """checks the capitalisation of two strings isn't wildly different"""
726 str1 = self.removevariables(str1)
727 str2 = self.removevariables(str2)
728 # TODO: review this. The 'I' is specific to English, so it probably serves
729 # no purpose to get sourcelang.sentenceend
730 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, " i ", str1)
731 capitals1 = helpers.filtercount(str1, type(str1).isupper)
732 capitals2 = helpers.filtercount(str2, type(str2).isupper)
733 alpha1 = helpers.filtercount(str1, type(str1).isalpha)
734 alpha2 = helpers.filtercount(str2, type(str2).isalpha)
735 # Capture the all caps case
736 if capitals1 == alpha1:
737 return capitals2 == alpha2
738 # some heuristic tests to try and see that the style of capitals is vaguely the same
739 if capitals1 == 0 or capitals1 == 1:
740 return capitals2 == capitals1
741 elif capitals1 < len(str1) / 10:
742 return capitals2 < len(str2) / 8
743 elif len(str1) < 10:
744 return abs(capitals1 - capitals2) < 3
745 elif capitals1 > len(str1) * 6 / 10:
746 return capitals2 > len(str2) * 6 / 10
747 else:
748 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
749
751 """checks that acronyms that appear are unchanged"""
752 acronyms = []
753 allowed = []
754 for startmatch, endmatch in self.config.varmatches:
755 allowed += decoration.getvariables(startmatch, endmatch)(str1)
756 allowed += self.config.musttranslatewords.keys()
757 str1 = self.filteraccelerators(self.filtervariables(str1))
758 iter = self.config.lang.word_iter(str1)
759 str2 = self.filteraccelerators(self.filtervariables(str2))
760 for word in iter:
761 if word.isupper() and len(word) > 1 and word not in allowed:
762 if str2.find(word) == -1:
763 acronyms.append(word)
764 if acronyms:
765 raise FilterFailure("acronyms should not be translated: " + ", ".join(acronyms))
766 return True
767
769 """checks for repeated words in the translation"""
770 lastword = ""
771 without_newlines = "\n".join(str2.split("\n"))
772 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(".", "").lower().split()
773 for word in words:
774 if word == lastword:
775 raise FilterFailure("The word '%s' is repeated" % word)
776 lastword = word
777 return True
778
780 """checks that words configured as untranslatable appear in the translation too"""
781 if not self.config.notranslatewords:
782 return True
783 str1 = self.filtervariables(str1)
784 str2 = self.filtervariables(str2)
785 #The above is full of strange quotes and things in utf-8 encoding.
786 #single apostrophe perhaps problematic in words like "doesn't"
787 for seperator in self.config.punctuation:
788 if isinstance(str1, unicode):
789 str1 = str1.replace(seperator, u" ")
790 else:
791 str1 = str1.replace(seperator.encode("utf-8"), " ")
792 if isinstance(str2, unicode):
793 str2 = str2.replace(seperator, u" ")
794 else:
795 str2 = str2.replace(seperator.encode("utf-8"), " ")
796 words1 = self.filteraccelerators(str1).split()
797 words2 = self.filteraccelerators(str2).split()
798 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
799 if stopwords:
800 raise FilterFailure("do not translate: %s" % (", ".join(stopwords)))
801 return True
802
804 """checks that words configured as definitely translatable don't appear in
805 the translation"""
806 if not self.config.musttranslatewords:
807 return True
808 str1 = self.removevariables(str1)
809 str2 = self.removevariables(str2)
810 #The above is full of strange quotes and things in utf-8 encoding.
811 #single apostrophe perhaps problematic in words like "doesn't"
812 for seperator in self.config.punctuation:
813 str1 = str1.replace(seperator, " ")
814 str2 = str2.replace(seperator, " ")
815 words1 = self.filteraccelerators(str1).split()
816 words2 = self.filteraccelerators(str2).split()
817 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
818 if stopwords:
819 raise FilterFailure("please translate: %s" % (", ".join(stopwords)))
820 return True
821
823 """checks that only characters specified as valid appear in the translation"""
824 if not self.config.validcharsmap:
825 return True
826 invalid1 = str1.translate(self.config.validcharsmap)
827 invalid2 = str2.translate(self.config.validcharsmap)
828 invalidchars = ["'%s' (\\u%04x)" % (invalidchar.encode('utf-8'), ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
829 if invalidchars:
830 raise FilterFailure("invalid chars: %s" % (", ".join(invalidchars)))
831 return True
832
834 """checks that file paths have not been translated"""
835 for word1 in self.filteraccelerators(str1).split():
836 if word1.startswith("/"):
837 if not helpers.countsmatch(str1, str2, (word1,)):
838 return False
839 return True
840
867
869 """checks to ensure that no KDE style comments appear in the translation"""
870 return str2.find("\n_:") == -1 and not str2.startswith("_:")
871
873 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
874 return str2.find("#-#-#-#-#") == -1
875
877 """checks for English style plural(s) for you to review"""
878 def numberofpatterns(string, patterns):
879 number = 0
880 for pattern in patterns:
881 number += len(re.findall(pattern, string))
882 return number
883
884 sourcepatterns = ["\(s\)"]
885 targetpatterns = ["\(s\)"]
886 sourcecount = numberofpatterns(str1, sourcepatterns)
887 targetcount = numberofpatterns(str2, targetpatterns)
888 if self.config.lang.nplurals == 1:
889 return not targetcount
890 return sourcecount == targetcount
891
893 """checks words that don't pass a spell check"""
894 if not self.config.targetlanguage:
895 return True
896 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(str1)))
897 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(str2)))
898 ignore1 = []
899 messages = []
900 for word, index, suggestions in spelling.check(str1, lang="en"):
901 ignore1.append(word)
902 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
903 if word in ignore1:
904 continue
905 # hack to ignore hyphenisation rules
906 if word in suggestions:
907 continue
908 if isinstance(str2, unicode) or isinstance(str1, unicode):
909 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions)))
910 else:
911 messages.append("check spelling of %s (could be %s)" % (word, " / ".join(suggestions)))
912 if messages:
913 raise FilterFailure(messages)
914 return True
915
917 """checks for messages containing translation credits instead of normal translations."""
918 return not str1 in self.config.credit_sources
919
920 # If the precondition filter is run and fails then the other tests listed are ignored
921 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
922 "accelerators", "brackets", "endpunc",
923 "acronyms", "xmltags", "startpunc",
924 "endwhitespace", "startwhitespace",
925 "escapes", "doublequoting", "singlequoting",
926 "filepaths", "purepunc", "doublespacing",
927 "sentencecount", "numbers", "isfuzzy",
928 "isreview", "notranslatewords", "musttranslatewords",
929 "emails", "simpleplurals", "urls", "printf",
930 "tabs", "newlines", "functions", "options",
931 "blank", "nplurals"),
932 "blank": ("simplecaps", "variables", "startcaps",
933 "accelerators", "brackets", "endpunc",
934 "acronyms", "xmltags", "startpunc",
935 "endwhitespace", "startwhitespace",
936 "escapes", "doublequoting", "singlequoting",
937 "filepaths", "purepunc", "doublespacing",
938 "sentencecount", "numbers", "isfuzzy",
939 "isreview", "notranslatewords", "musttranslatewords",
940 "emails", "simpleplurals", "urls", "printf",
941 "tabs", "newlines", "functions", "options"),
942 "credits": ("simplecaps", "variables", "startcaps",
943 "accelerators", "brackets", "endpunc",
944 "acronyms", "xmltags", "startpunc",
945 "escapes", "doublequoting", "singlequoting",
946 "filepaths", "doublespacing",
947 "sentencecount", "numbers",
948 "emails", "simpleplurals", "urls", "printf",
949 "tabs", "newlines", "functions", "options"),
950 "purepunc": ("startcaps", "options"),
951 "startcaps": ("simplecaps",),
952 "endwhitespace": ("endpunc",),
953 "startwhitespace":("startpunc",),
954 "unchanged": ("doublewords",),
955 "compendiumconflicts": ("accelerators", "brackets", "escapes",
956 "numbers", "startpunc", "long", "variables",
957 "startcaps", "sentencecount", "simplecaps",
958 "doublespacing", "endpunc", "xmltags",
959 "startwhitespace", "endwhitespace",
960 "singlequoting", "doublequoting",
961 "filepaths", "purepunc", "doublewords", "printf") }
962
963 # code to actually run the tests (use unittest?)
964
965 openofficeconfig = CheckerConfig(
966 accelmarkers = ["~"],
967 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
968 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)],
969 canchangetags = [("link", "name", None)]
970 )
971
974 checkerconfig = kwargs.get("checkerconfig", None)
975 if checkerconfig is None:
976 checkerconfig = CheckerConfig()
977 kwargs["checkerconfig"] = checkerconfig
978 checkerconfig.update(openofficeconfig)
979 StandardChecker.__init__(self, **kwargs)
980
981 mozillaconfig = CheckerConfig(
982 accelmarkers = ["&"],
983 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")],
984 criticaltests = ["accelerators"]
985 )
986
989 checkerconfig = kwargs.get("checkerconfig", None)
990 if checkerconfig is None:
991 checkerconfig = CheckerConfig()
992 kwargs["checkerconfig"] = checkerconfig
993 checkerconfig.update(mozillaconfig)
994 StandardChecker.__init__(self, **kwargs)
995
996 gnomeconfig = CheckerConfig(
997 accelmarkers = ["_"],
998 varmatches = [("%", 1), ("$(", ")")],
999 credit_sources = [u"translator-credits"]
1000 )
1001
1004 checkerconfig = kwargs.get("checkerconfig", None)
1005 if checkerconfig is None:
1006 checkerconfig = CheckerConfig()
1007 kwargs["checkerconfig"] = checkerconfig
1008 checkerconfig.update(gnomeconfig)
1009 StandardChecker.__init__(self, **kwargs)
1010
1011 kdeconfig = CheckerConfig(
1012 accelmarkers = ["&"],
1013 varmatches = [("%", 1)],
1014 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"]
1015 )
1016
1019 # TODO allow setup of KDE plural and translator comments so that they do
1020 # not create false postives
1021 checkerconfig = kwargs.get("checkerconfig", None)
1022 if checkerconfig is None:
1023 checkerconfig = CheckerConfig()
1024 kwargs["checkerconfig"] = checkerconfig
1025 checkerconfig.update(kdeconfig)
1026 StandardChecker.__init__(self, **kwargs)
1027
1028 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1031 checkerconfig = kwargs.get("checkerconfig", None)
1032 if checkerconfig is None:
1033 checkerconfig = CheckerConfig()
1034 kwargs["checkerconfig"] = checkerconfig
1035 checkerconfig.update(cclicenseconfig)
1036 StandardChecker.__init__(self, **kwargs)
1037
1038 projectcheckers = {
1039 "openoffice": OpenOfficeChecker,
1040 "mozilla": MozillaChecker,
1041 "kde": KdeChecker,
1042 "wx": KdeChecker,
1043 "gnome": GnomeChecker,
1044 "creativecommons": CCLicenseChecker
1045 }
1046
1047
1049 """The standard checks for common checks on translation units."""
1053
1057
1059 """Checks for the correct number of noun forms for plural translations."""
1060 if unit.hasplural():
1061 # if we don't have a valid nplurals value, don't run the test
1062 nplurals = self.config.lang.nplurals
1063 if nplurals > 0:
1064 return len(unit.target.strings) == nplurals
1065 return True
1066
1068 """Checks if there is at least one suggested translation for this unit."""
1069 self.suggestion_store = getattr(self, 'suggestion_store', None)
1070 suggestions = []
1071 if self.suggestion_store:
1072 source = unit.source
1073 suggestions = [unit for unit in self.suggestion_store.units if unit.source == source]
1074 elif xliff and isinstance(unit, xliff.xliffunit):
1075 # TODO: we probably want to filter them somehow
1076 suggestions = unit.getalttrans()
1077 return not bool(suggestions)
1078
1079
1081 """verifies that the tests pass for a pair of strings"""
1082 from translate.storage import base
1083 str1 = data.forceunicode(str1)
1084 str2 = data.forceunicode(str2)
1085 unit = base.TranslationUnit(str1)
1086 unit.target = str2
1087 checker = StandardChecker(excludefilters=ignorelist)
1088 failures = checker.run_filters(unit)
1089 for testname, message in failures:
1090 print "failure: %s: %s\n %r\n %r" % (testname, message, str1, str2)
1091 return failures
1092
1094 """runs test on a batch of string pairs"""
1095 passed, numpairs = 0, len(pairs)
1096 for str1, str2 in pairs:
1097 if runtests(str1, str2):
1098 passed += 1
1099 print
1100 print "total: %d/%d pairs passed" % (passed, numpairs)
1101
1102 if __name__ == '__main__':
1103 testset = [(r"simple", r"somple"),
1104 (r"\this equals \that", r"does \this equal \that?"),
1105 (r"this \'equals\' that", r"this 'equals' that"),
1106 (r" start and end! they must match.", r"start and end! they must match."),
1107 (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1108 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1109 (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1110 (r"%% %%", r"%%"),
1111 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1112 (r"simple lowercase", r"it is all lowercase"),
1113 (r"simple lowercase", r"It Is All Lowercase"),
1114 (r"Simple First Letter Capitals", r"First Letters"),
1115 (r"SIMPLE CAPITALS", r"First Letters"),
1116 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1117 (r"forgot to translate", r" ")
1118 ]
1119 batchruntests(testset)
1120
| Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed Mar 26 12:49:36 2008 | http://epydoc.sourceforge.net |