Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Aniket025
GitHub Repository: Aniket025/Medical-Prescription-OCR
Path: blob/master/Model-5/spell.pyc
427 views
�
}�Zc@seddlZddlZdZdadad�Zd�Zd�Zd�Ze	d�Z
e	d	�Zed
�Z
d�Zedkrad
GHejd�ej�ZiZeed�ej�eZdGHdeGHdGHdGHdGHdGHxter]ed�Zee�dkrdGHPnej�Ze
eed�GHej�eZdGHdeGHdGHq�WndS(i����NiicCs�g}|g}x�tt�D]�}g}x�|D]�}t|�dkr/xktt|��D]T}|| ||d}||kr�|j|�n||krZ|j|�qZqZWq/q/W|}qW|S(sPgiven a word, derive strings with up to max_edit_distance characters
	   deletedi(trangetmax_edit_distancetlentappend(twtdeletestqueuetdt
temp_queuetwordtctword_minus_c((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pytget_deletes_lists	

cCs�t}||kr9||d||ddf||<n%gdf||<ttt|��a||ddkr�t}t|�}xH|D]=}||kr�||dj|�q�|gdf||<q�Wn|S(s0add word and its derived deletions to dictionaryii(tFalsetmaxtlongest_word_lengthRtTrueRR(t
dictionaryRtnew_real_word_addedRtitem((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pytcreate_dictionary_entrys'
c	Cs�d}d}t|��l}dGHx]|D]U}tjd|j��}x4|D],}|d7}t||�rL|d7}qLqLWq'WWdQX|S(NisCreating dictionary...s[a-z]+i(topentretfindalltlowerR(Rtfnamettotal_word_counttunique_word_counttfiletlinetwordsR	((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pytcreate_dictionary?s


c
Csvd}tdt|�d�dg}x;tt|��D]'}||dgt|�|dg}}}x�tt|��D]�}||d}||dd}||d||||k}	t|||	�||<|dkr~|dkr~||||dkr~||d||kr~||||kr~t||||dd�||<q~q~Wq9W|t|�dS(s,Calculate the Damerau-Levenshtein distance between sequences.
	This method has not been modified from the original.
	Source: http://mwh.geek.nz/2009/04/26/python-damerau-levenshtein-distance/

	This distance is the number of additions, deletions, substitutions,
	and transpositions needed to transform the first sequence into the
	second. Although generally used with strings, any sequences of
	comparable objects will work.
	Transpositions are exchanges of *consecutive* characters; all other
	operations are self-explanatory.
	This implementation is O(N*M) time and O(M) space, for N and M the
	lengths of the two sequences.
	>>> dameraulevenshtein('ba', 'abc')
	2
	>>> dameraulevenshtein('fee', 'deed')
	2
	It works with arbitrary sequences too:
	>>> dameraulevenshtein('abcd', ['b', 'a', 'c', 'd', 'e'])
	2
	iiiN(tNoneRRtxrangetmin(
tseq1tseq2toneagotthisrowtxttwoagotytdelcosttaddcosttsubcost((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pytdameraulevenshteinPs , 0,.c
s9t|�ttkr(|s$dGHngSi}td��|g}i}xlt|�dkr�|d}|d}tdkr�t|�dkr�t|�t|��kr�Pn||kr�||kr�||ddkr�t|�t|�ks�t�||dt|�t|�f||<tdkrJt|�t|�krJPq�t|�t|��kr�t|�t|��q�nx\||dD]I}||kr�t|�t|�ks�t�t|�t|�ks�t�t|�t|�kr"||ks	t�t|�t|�}n||ks4t�t||�}tdkr^|�kr^nN|tkr�||ks|t�||d|f||<|�kr�|�q�ntdkr��fd�|j�D�}q�q�q�Wnt|�t|�ks�t�tdkr*t|�t|��kr*qLt|�t|�tkrLt|�dkrLxYtt|��D]B}	||	 ||	d}
|
|krk|j	|
�d||
<qkqkWqLqLW|r�tdkr�dt|�GHdtGHn|j�}t|d	d
��}tdkr1t|�dkr)gS|dS|SdS(sQreturn list of suggested corrections for potentially incorrectly
	   spelled words3no items in dictionary within maximum edit distancetinfiiics/i|]%\}}|d�kr||�qS(i((t.0tktv(tmin_suggest_len(sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pys
<dictcomp>�s	s"number of possible corrections: %is!  edit distance for deletions: %itkeycSs|\}\}}||fS(N((R/ttermtfreqtdist((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pyt<lambda>�sN(RRRtfloattverbosetAssertionErrorR-titemsRRR tsorted(
Rtstringtsilenttsuggest_dictRtq_dictionarytq_itemtsc_itemt	item_distR
Rtas_listtoutlist((R2sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pytget_suggestions{sz	

$)(.
cCs'yt|||�dSWn|SXdS(Ni(RF(RtsR>((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pyt	best_wordsc
Cst|d��}t|���}d}d}d}x�t|�D]�\}	}
tjd|
j��}x�|D]�}|d7}|dj�r�t||�}
t|
�dkr�|j|�|d7}q|j|
d�|
d|kr|d7}qqk|j|�qkWq@WWdQXWdQXdS(NRis[a-z]+|[^a-z]+i(	Rt	enumerateRRRtisalphaRFRtwrite(RRtoutfnamet	printlisttoutfileRtdoc_word_counttcorrected_word_counttunknown_word_counttiRt	doc_wordstdoc_wordt
suggestion((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pytcorrect_documents$



cCst||d�dS(Ns
output.txt(RV(Rtword_in((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pyt
spellcheck'st__main__sPlease wait...is./total_medicine_data.txts-----s%.2f seconds to runt sWord corrections---------------s)Enter your file path (or enter to exit): tgoodbyes
output.txts%.5f seconds to run(RttimeRR9RRRRR-R
RFRHRRVRXt__name__tsleept
start_timeRtrun_timet	raw_inputRWR(((sO/Users/Piyush_Jena/Documents/Opensoft/spell_correction/test_software_2/spell.pyt<module>sH		 		+�