Path: blob/master/thirdparty/chardet/charsetgroupprober.py
2992 views
######################## BEGIN LICENSE BLOCK ########################1# The Original Code is Mozilla Communicator client code.2#3# The Initial Developer of the Original Code is4# Netscape Communications Corporation.5# Portions created by the Initial Developer are Copyright (C) 19986# the Initial Developer. All Rights Reserved.7#8# Contributor(s):9# Mark Pilgrim - port to Python10#11# This library is free software; you can redistribute it and/or12# modify it under the terms of the GNU Lesser General Public13# License as published by the Free Software Foundation; either14# version 2.1 of the License, or (at your option) any later version.15#16# This library is distributed in the hope that it will be useful,17# but WITHOUT ANY WARRANTY; without even the implied warranty of18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU19# Lesser General Public License for more details.20#21# You should have received a copy of the GNU Lesser General Public22# License along with this library; if not, write to the Free Software23# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA24# 02110-1301 USA25######################### END LICENSE BLOCK #########################2627from .enums import ProbingState28from .charsetprober import CharSetProber293031class CharSetGroupProber(CharSetProber):32def __init__(self, lang_filter=None):33super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)34self._active_num = 035self.probers = []36self._best_guess_prober = None3738def reset(self):39super(CharSetGroupProber, self).reset()40self._active_num = 041for prober in self.probers:42if prober:43prober.reset()44prober.active = True45self._active_num += 146self._best_guess_prober = None4748@property49def charset_name(self):50if not self._best_guess_prober:51self.get_confidence()52if not self._best_guess_prober:53return None54return self._best_guess_prober.charset_name5556@property57def language(self):58if not self._best_guess_prober:59self.get_confidence()60if not self._best_guess_prober:61return None62return self._best_guess_prober.language6364def feed(self, byte_str):65for prober in self.probers:66if not prober:67continue68if not prober.active:69continue70state = prober.feed(byte_str)71if not state:72continue73if state == ProbingState.FOUND_IT:74self._best_guess_prober = prober75return self.state76elif state == ProbingState.NOT_ME:77prober.active = False78self._active_num -= 179if self._active_num <= 0:80self._state = ProbingState.NOT_ME81return self.state82return self.state8384def get_confidence(self):85state = self.state86if state == ProbingState.FOUND_IT:87return 0.9988elif state == ProbingState.NOT_ME:89return 0.0190best_conf = 0.091self._best_guess_prober = None92for prober in self.probers:93if not prober:94continue95if not prober.active:96self.logger.debug('%s not active', prober.charset_name)97continue98conf = prober.get_confidence()99self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)100if best_conf < conf:101best_conf = conf102self._best_guess_prober = prober103if not self._best_guess_prober:104return 0.0105return best_conf106107108