Path: blob/master/notebooks/book2/02/zipfs_law_plot.ipynb
1193 views
Kernel: NewPython
Application of Zipf's law on H. G. Wells' The Time Machine dataset
In [1]:
In [2]:
Out[2]:
/home/madhav/.local/lib/python3.8/site-packages/probml_utils/plotting.py:26: UserWarning: LATEXIFY environment variable not set, not latexifying
warnings.warn("LATEXIFY environment variable not set, not latexifying")
In [3]:
Out[3]:
['the', 'time', 'machine', 'by', 'h', 'g', 'wells']
[]
['i']
[]
['the', 'time', 'traveller', 'for', 'so', 'it', 'will', 'be', 'convenient', 'to', 'speak', 'of', 'him', 'was', 'expounding', 'a', 'recondite', 'matter', 'to', 'us', 'his', 'grey', 'eyes', 'shone', 'and', 'twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the', 'fire', 'burned', 'brightly', 'and', 'the', 'soft', 'radiance', 'of', 'the', 'incandescent', 'lights', 'in', 'the', 'lilies', 'of', 'silver', 'caught', 'the', 'bubbles', 'that', 'flashed', 'and', 'passed', 'in', 'our', 'glasses', 'our', 'chairs', 'being', 'his', 'patents', 'embraced', 'and', 'caressed', 'us', 'rather', 'than', 'submitted', 'to', 'be', 'sat', 'upon', 'and', 'there', 'was', 'that', 'luxurious', 'after', 'dinner', 'atmosphere', 'when', 'thought', 'roams', 'gracefully', 'free', 'of', 'the', 'trammels', 'of', 'precision', 'and', 'he', 'put', 'it', 'to', 'us', 'in', 'this', 'way', 'marking', 'the', 'points', 'with', 'a', 'lean', 'forefinger', 'as', 'we', 'sat', 'and', 'lazily', 'admired', 'his', 'earnestness', 'over', 'this', 'new', 'paradox', 'as', 'we', 'thought', 'it', 'and', 'his', 'fecundity']
[]
['you', 'must', 'follow', 'me', 'carefully', 'i', 'shall', 'have', 'to', 'controvert', 'one', 'or', 'two', 'ideas', 'that', 'are', 'almost', 'universally', 'accepted', 'the', 'geometry', 'for', 'instance', 'they', 'taught', 'you', 'at', 'school', 'is', 'founded', 'on', 'a', 'misconception']
[]
['is', 'not', 'that', 'rather', 'a', 'large', 'thing', 'to', 'expect', 'us', 'to', 'begin', 'upon', 'said', 'filby', 'an', 'argumentative', 'person', 'with', 'red', 'hair']
[]
"\n['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n[]\n['i']\n[]\n['the', 'time', 'traveller', 'for', 'so', 'it', 'will', 'be', 'convenient', 'to', 'speak', 'of', 'him', 'was', 'expounding', 'a', 'recondite', 'matter', 'to', 'us', 'his', 'grey', 'eyes', 'shone', 'and', 'twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the', 'fire', 'burned', 'brightly', 'and', 'the', 'soft', 'radiance', 'of', 'the', 'incandescent', 'lights', 'in', 'the', 'lilies', 'of', 'silver', 'caught', 'the', 'bubbles', 'that', 'flashed', 'and', 'passed', 'in', 'our', 'glasses', 'our', 'chairs', 'being', 'his', 'patents', 'embraced', 'and', 'caressed', 'us', 'rather', 'than', 'submitted', 'to', 'be', 'sat', 'upon', 'and', 'there', 'was', 'that', 'luxurious', 'after', 'dinner', 'atmosphere', 'when', 'thought', 'roams', 'gracefully', 'free', 'of', 'the', 'trammels', 'of', 'precision', 'and', 'he', 'put', 'it', 'to', 'us', 'in', 'this', 'way', 'marking', 'the', 'points', 'with', 'a', 'lean', 'forefinger', 'as', 'we', 'sat', 'and', 'lazily', 'admired', 'his', 'earnestness', 'over', 'this', 'new', 'paradox', 'as', 'we', 'thought', 'it', 'and', 'his', 'fecundity']\n[]\n['you', 'must', 'follow', 'me', 'carefully', 'i', 'shall', 'have', 'to', 'controvert', 'one', 'or', 'two', 'ideas', 'that', 'are', 'almost', 'universally', 'accepted', 'the', 'geometry', 'for', 'instance', 'they', 'taught', 'you', 'at', 'school', 'is', 'founded', 'on', 'a', 'misconception']\n[]\n['is', 'not', 'that', 'rather', 'a', 'large', 'thing', 'to', 'expect', 'us', 'to', 'begin', 'upon', 'said', 'filby', 'an', 'argumentative', 'person', 'with', 'red', 'hair']\n[]\n"
In [4]:
Out[4]:
WARNING:absl:No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
First 10 unigrams
['the', 'time', 'machine', 'by', 'h', 'g', 'wells', 'i', 'the', 'time']
First 10 bigrams
[('the', 'time'), ('time', 'machine'), ('machine', 'by'), ('by', 'h'), ('h', 'g'), ('g', 'wells'), ('wells', 'i'), ('i', 'the'), ('the', 'time'), ('time', 'traveller')]
First 10 trigrams
[('the', 'time', 'machine'), ('time', 'machine', 'by'), ('machine', 'by', 'h'), ('by', 'h', 'g'), ('h', 'g', 'wells'), ('g', 'wells', 'i'), ('wells', 'i', 'the'), ('i', 'the', 'time'), ('the', 'time', 'traveller'), ('time', 'traveller', 'for')]
Most common unigrams
[('the', 2261), ('i', 1267), ('and', 1245), ('of', 1155), ('a', 816), ('to', 695), ('was', 552), ('in', 541), ('that', 443), ('my', 440)]
Most common bigrams
[(('of', 'the'), 309), (('in', 'the'), 169), (('i', 'had'), 130), (('i', 'was'), 112), (('and', 'the'), 109), (('the', 'time'), 102), (('it', 'was'), 99), (('to', 'the'), 85), (('as', 'i'), 78), (('of', 'a'), 73)]
Most common trigrams
[(('the', 'time', 'traveller'), 59), (('the', 'time', 'machine'), 30), (('the', 'medical', 'man'), 24), (('it', 'seemed', 'to'), 16), (('it', 'was', 'a'), 15), (('here', 'and', 'there'), 15), (('seemed', 'to', 'me'), 14), (('i', 'did', 'not'), 14), (('i', 'saw', 'the'), 13), (('i', 'began', 'to'), 13)]
"\nFirst 10 unigrams\n ['the', 'time', 'machine', 'by', 'h', 'g', 'wells', 'i', 'the', 'time']\nFirst 10 bigrams\n [('the', 'time'), ('time', 'machine'), ('machine', 'by'), ('by', 'h'), ('h', 'g'), ('g', 'wells'), ('wells', 'i'), ('i', 'the'), ('the', 'time'), ('time', 'traveller')]\nFirst 10 trigrams\n [('the', 'time', 'machine'), ('time', 'machine', 'by'), ('machine', 'by', 'h'), ('by', 'h', 'g'), ('h', 'g', 'wells'), ('g', 'wells', 'i'), ('wells', 'i', 'the'), ('i', 'the', 'time'), ('the', 'time', 'traveller'), ('time', 'traveller', 'for')]\nMost common unigrams\n [('the', 2261), ('i', 1267), ('and', 1245), ('of', 1155), ('a', 816), ('to', 695), ('was', 552), ('in', 541), ('that', 443), ('my', 440)]\nMost common bigrams\n [(('of', 'the'), 309), (('in', 'the'), 169), (('i', 'had'), 130), (('i', 'was'), 112), (('and', 'the'), 109), (('the', 'time'), 102), (('it', 'was'), 99), (('to', 'the'), 85), (('as', 'i'), 78), (('of', 'a'), 73)]\nMost common trigrams\n [(('the', 'time', 'traveller'), 59), (('the', 'time', 'machine'), 30), (('the', 'medical', 'man'), 24), (('it', 'seemed', 'to'), 16), (('it', 'was', 'a'), 15), (('here', 'and', 'there'), 15), (('seemed', 'to', 'me'), 14), (('i', 'did', 'not'), 14), (('i', 'saw', 'the'), 13), (('i', 'began', 'to'), 13)]\n"
In [5]:
Out[5]:
/home/madhav/.local/lib/python3.8/site-packages/probml_utils/plotting.py:79: UserWarning: set FIG_DIR environment variable to save figures
warnings.warn("set FIG_DIR environment variable to save figures")
In [6]:
Out[6]: