HEX
Server: Apache
System: Linux server2.voipitup.com.au 4.18.0-553.109.1.lve.el8.x86_64 #1 SMP Thu Mar 5 20:23:46 UTC 2026 x86_64
User: posscale (1027)
PHP: 8.2.30
Disabled: exec,passthru,shell_exec,system
Upload Files
File: //opt/saltstack/salt/lib/python3.10/site-packages/charset_normalizer/__pycache__/md.cpython-310.pyc
o

�N�g�H�
@sfddlmZddlmZddlmZmZddlmZm	Z	m
Z
ddlmZm
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZGdd�d�ZGd	d
�d
e�ZGdd�de�ZGd
d�de�ZGdd�de�Z Gdd�de�Z!Gdd�de�Z"Gdd�de�Z#Gdd�de�Z$edd�dee%dee%de&fdd��Z'ed d�	"d)d#e%d$e(d%e&de(fd&d'��Z)d(S)*�)�	lru_cache)�	getLogger)�List�Optional�)�COMMON_SAFE_ASCII_CHARACTERS�TRACE�UNICODE_SECONDARY_RANGE_KEYWORD)�is_accentuated�is_ascii�is_case_variable�is_cjk�is_emoticon�	is_hangul�is_hiragana�is_katakana�is_latin�is_punctuation�is_separator�	is_symbol�is_thai�is_unprintable�
remove_accent�
unicode_rangec@sPeZdZdZdedefdd�Zdeddfdd�Zd
d	d
�Ze	de
fdd��ZdS)�MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    �	character�returncC�t�)z@
        Determine if given character should be fed in.
        ��NotImplementedError��selfr�r"�I/opt/saltstack/salt/lib/python3.10/site-packages/charset_normalizer/md.py�eligible$�zMessDetectorPlugin.eligibleNcCr)z�
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        rr r"r"r#�feed*szMessDetectorPlugin.feedcCr)zB
        Permit to reset the plugin to the initial state.
        r�r!r"r"r#�reset1r%zMessDetectorPlugin.resetcCr)z�
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        rr'r"r"r#�ratio7szMessDetectorPlugin.ratio�rN)�__name__�
__module__�__qualname__�__doc__�str�boolr$r&r(�property�floatr)r"r"r"r#rs
rc@�VeZdZddd�Zdedefdd�Zdeddfdd	�Zdd
d�Ze	de
fdd
��ZdS)� TooManySymbolOrPunctuationPluginrNcCs"d|_d|_d|_d|_d|_dS)NrF)�_punctuation_count�
_symbol_count�_character_count�_last_printable_charZ_frenzy_symbol_in_wordr'r"r"r#�__init__As

z)TooManySymbolOrPunctuationPlugin.__init__rcC�|��S�N��isprintabler r"r"r#r$I�z)TooManySymbolOrPunctuationPlugin.eligiblecCsp|jd7_||jkr3|tvr3t|�r|jd7_n|��dur3t|�r3t|�dur3|jd7_||_dS)NrF�)	r7r8rrr5�isdigitrrr6r r"r"r#r&Ls
�
z%TooManySymbolOrPunctuationPlugin.feedcCsd|_d|_d|_dS�Nr)r5r7r6r'r"r"r#r(^�
z&TooManySymbolOrPunctuationPlugin.resetcCs0|jdkrdS|j|j|j}|dkr|SdS)Nr��333333�?)r7r5r6)r!Zratio_of_punctuationr"r"r#r)cs

�z&TooManySymbolOrPunctuationPlugin.ratior*�r+r,r-r9r/r0r$r&r(r1r2r)r"r"r"r#r4@s

r4c@r3)�TooManyAccentuatedPluginrNcC�d|_d|_dSrA�r7�_accentuated_countr'r"r"r#r9p�
z!TooManyAccentuatedPlugin.__init__rcCr:r;)�isalphar r"r"r#r$tr>z!TooManyAccentuatedPlugin.eligiblecCs,|jd7_t|�r|jd7_dSdS�Nr)r7r
rIr r"r"r#r&ws�zTooManyAccentuatedPlugin.feedcCrGrArHr'r"r"r#r(}rJzTooManyAccentuatedPlugin.resetcCs4|jdks
|jdkrdS|j|j}|dkr|SdS)Nr�rCgffffff�?rH)r!Zratio_of_accentuationr"r"r#r)�szTooManyAccentuatedPlugin.ratior*rEr"r"r"r#rFos

rFc@r3)�UnprintablePluginrNcCrGrA)�_unprintable_countr7r'r"r"r#r9�rJzUnprintablePlugin.__init__rcC�dS�NTr"r r"r"r#r$��zUnprintablePlugin.eligiblecCs(t|�r|jd7_|jd7_dSrL)rrOr7r r"r"r#r&�szUnprintablePlugin.feedcCs
d|_dSrA)rOr'r"r"r#r(�s
zUnprintablePlugin.resetcC�|jdkrdS|jd|jS)NrrCrM)r7rOr'r"r"r#r)��
zUnprintablePlugin.ratior*rEr"r"r"r#rN�s

rNc@r3)�SuspiciousDuplicateAccentPluginrNcC�d|_d|_d|_dSrA��_successive_countr7�_last_latin_characterr'r"r"r#r9�s
z(SuspiciousDuplicateAccentPlugin.__init__rcCs|��ot|�Sr;)rKrr r"r"r#r$�sz(SuspiciousDuplicateAccentPlugin.eligiblecCst|jd7_|jdur5t|�r5t|j�r5|��r%|j��r%|jd7_t|�t|j�kr5|jd7_||_dSrL)r7rYr
�isupperrXrr r"r"r#r&�s
��
z$SuspiciousDuplicateAccentPlugin.feedcCrVrArWr'r"r"r#r(�rBz%SuspiciousDuplicateAccentPlugin.resetcCrS)NrrCr?)r7rXr'r"r"r#r)�rTz%SuspiciousDuplicateAccentPlugin.ratior*rEr"r"r"r#rU�s

rUc@r3)�SuspiciousRangerNcCrVrA)�"_suspicious_successive_range_countr7�_last_printable_seenr'r"r"r#r9�rBzSuspiciousRange.__init__rcCr:r;r<r r"r"r#r$�r>zSuspiciousRange.eligiblecCsx|jd7_|��st|�s|tvrd|_dS|jdur"||_dSt|j�}t|�}t||�r7|jd7_||_dSrL)r7�isspacerrr]r� is_suspiciously_successive_ranger\)r!r�unicode_range_a�unicode_range_br"r"r#r&�s ��



zSuspiciousRange.feedcCrVrA)r7r\r]r'r"r"r#r(�rBzSuspiciousRange.resetcCs.|jdkrdS|jd|j}|dkrdS|S)NrrCr?g�������?)r7r\)r!Zratio_of_suspicious_range_usager"r"r#r)�s
�zSuspiciousRange.ratior*rEr"r"r"r#r[�s

r[c@r3)�SuperWeirdWordPluginrNcCs:d|_d|_d|_d|_d|_d|_d|_d|_d|_dS)NrF�)	�_word_count�_bad_word_count�_foreign_long_count�_is_current_word_bad�_foreign_long_watchr7�_bad_character_count�_buffer�_buffer_accent_countr'r"r"r#r9�s
zSuperWeirdWordPlugin.__init__rcCrPrQr"r r"r"r#r$	rRzSuperWeirdWordPlugin.eligiblecCs|��rH|j|7_t|�r|jd7_|jdurFt|�dus%t|�rFt|�durFt|�durFt|�durFt	|�durFt
|�durFd|_dS|jsMdS|��sYt|�sYt
|�r�|jr�|jd7_t|j�}|j|7_|dkr�|j|dkr}d|_t|jd�r�|jd��r�|jd7_d|_|dkr�|jr�dd	�t|jtd
|��D�}d}|r�t|�|dkr�d}|s�|jd7_d|_|jr�|jd7_|jt|j�7_d|_d|_d|_d
|_dS|d
v�r|��du�rt|��rd|_|j|7_dSdSdSdS)NrFT�g�(\���?����cSsg|]
\}}|��r|�qSr")rZ)�.0�c�ir"r"r#�
<listcomp>/s��z-SuperWeirdWordPlugin.feed.<locals>.<listcomp>rrDrc>�_�<�>�~�|�=�-)rKrjr
rkrhrr
rrrrr^rrrd�lenr7rgrZrf�zip�rangererir@r)r!rZ
buffer_lengthZcamel_case_dstZprobable_camel_casedr"r"r#r&sr
����
�

��zSuperWeirdWordPlugin.feedcCs4d|_d|_d|_d|_d|_d|_d|_d|_dS)NrcFr)rjrgrhrerdr7rirfr'r"r"r#r(Ms
zSuperWeirdWordPlugin.resetcCs$|jdkr|jdkrdS|j|jS)N�
rrC)rdrfrir7r'r"r"r#r)WszSuperWeirdWordPlugin.ratior*rEr"r"r"r#rb�s

A
rbc@sZeZdZdZddd�Zdedefdd�Zdeddfd	d
�Zddd�Z	e
defd
d��ZdS)�CjkInvalidStopPluginu�
    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
    can be easily detected. Searching for the overuse of '丅' and '丄'.
    rNcCrGrA��_wrong_stop_count�_cjk_character_countr'r"r"r#r9erJzCjkInvalidStopPlugin.__init__rcCrPrQr"r r"r"r#r$irRzCjkInvalidStopPlugin.eligiblecCs8|dvr
|jd7_dSt|�r|jd7_dSdS)N>u丄u丅r)r�r
r�r r"r"r#r&ls�zCjkInvalidStopPlugin.feedcCrGrArr'r"r"r#r(srJzCjkInvalidStopPlugin.resetcCs|jdkrdS|j|jS)N�rC)r�r�r'r"r"r#r)ws
zCjkInvalidStopPlugin.ratior*)
r+r,r-r.r9r/r0r$r&r(r1r2r)r"r"r"r#r~_s

r~c@r3)�ArchaicUpperLowerPluginrNcCs.d|_d|_d|_d|_d|_d|_d|_dS)NFrT)�_buf�_character_count_since_last_sep�_successive_upper_lower_count�#_successive_upper_lower_count_finalr7�_last_alpha_seen�_current_ascii_onlyr'r"r"r#r9s
z ArchaicUpperLowerPlugin.__init__rcCrPrQr"r r"r"r#r$�rRz ArchaicUpperLowerPlugin.eligiblecCs$|��ot|�}|du}|rC|jdkrC|jdkr+|��dur+|jdur+|j|j7_d|_d|_d|_d|_|j	d7_	d|_dS|jdurQt
|�durQd|_|jdur|��r_|j��sh|��r||j��r||jdurx|jd7_d|_nd|_nd|_|j	d7_	|jd7_||_dS)NFr�@rTr?)
rKrr�r@r�r�r�r�r�r7rrZ�islower)r!rZis_concernedZ	chunk_sepr"r"r#r&�s@

�
��

zArchaicUpperLowerPlugin.feedcCs.d|_d|_d|_d|_d|_d|_d|_dS)NrFT)r7r�r�r�r�r�r�r'r"r"r#r(�s
zArchaicUpperLowerPlugin.resetcCs|jdkrdS|j|jS)NrrC)r7r�r'r"r"r#r)�s
zArchaicUpperLowerPlugin.ratior*rEr"r"r"r#r�~s


*	r��)�maxsizer`rarcCsb|dus|dur
dS||krdSd|vrd|vrdSd|vs"d|vr$dSd|vs,d|vr6d|vs4d|vr6dS|�d�|�d�}}|D]}|tvrJqC||vrQdSqC|dv|dv}}|s_|rid	|vsgd	|vridS|ro|rodSd
|vswd
|vr�d	|vsd	|vr�dS|dks�|dkr�dSd	|vs�d	|vs�|dvr�|dvr�d
|vs�d
|vr�dSd|vs�d|vr�dSdS)za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFZLatinZ	EmoticonsZ	Combining� )�Hiragana�KatakanaZCJKZHangulzBasic Latin)r�r�ZPunctuationZForms)�splitr	)r`raZkeywords_range_aZkeywords_range_b�elZrange_a_jp_charsZrange_b_jp_charsr"r"r#r_�sZ�����r_i皙�����?F�decoded_sequence�maximum_threshold�debugc	CsRdd�t��D�}t|�d}d}|dkrd}n	|dkrd}nd	}t|d
t|��D]2\}}|D]}	|	�|�r<|	�|�q0|dkrG||dksM||dkr\tdd
�|D��}||kr\nq*|r�td�}
|
�	t
d|�d|�d|���t|�dkr�|
�	t
d|dd����|
�	t
d|dd����|D]}|
�	t
|j�d|j���q�t
|d�S)zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    cSsg|]}|��qSr"r")roZmd_classr"r"r#rrs�zmess_ratio.<locals>.<listcomp>rrCi� r�r���
rcss�|]}|jVqdSr;)r))ro�dtr"r"r#�	<genexpr>0s�zmess_ratio.<locals>.<genexpr>Zcharset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=r�zStarting with: Nz
Ending with: i�z: �)r�__subclasses__rzr{r|r$r&�sumr�logr�	__class__r)�round)r�r�r�Z	detectors�lengthZmean_mess_ratioZ!intermediary_mean_mess_ratio_calcr�indexZdetector�loggerr�r"r"r#�
mess_ratiosN�

������
r�N)r�F)*�	functoolsr�loggingr�typingrrZconstantrrr	�utilsr
rrr
rrrrrrrrrrrrrr4rFrNrUr[rbr~r�r/r0r_r2r�r"r"r"r#�<module>sBH"/%4eL���F�����