Path: blob/master/notebooks/book2/02/bigram_hinton_diagram.ipynb
1193 views
Kernel: Python [conda env:py3713]
In [ ]:
In [2]:
Out[2]:
['the', 'time', 'machine', 'by', 'h', 'g', 'wells']
[]
['i']
[]
['the', 'time', 'traveller', 'for', 'so', 'it', 'will', 'be', 'convenient', 'to', 'speak', 'of', 'him', 'was', 'expounding', 'a', 'recondite', 'matter', 'to', 'us', 'his', 'grey', 'eyes', 'shone', 'and', 'twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the', 'fire', 'burned', 'brightly', 'and', 'the', 'soft', 'radiance', 'of', 'the', 'incandescent', 'lights', 'in', 'the', 'lilies', 'of', 'silver', 'caught', 'the', 'bubbles', 'that', 'flashed', 'and', 'passed', 'in', 'our', 'glasses', 'our', 'chairs', 'being', 'his', 'patents', 'embraced', 'and', 'caressed', 'us', 'rather', 'than', 'submitted', 'to', 'be', 'sat', 'upon', 'and', 'there', 'was', 'that', 'luxurious', 'after', 'dinner', 'atmosphere', 'when', 'thought', 'roams', 'gracefully', 'free', 'of', 'the', 'trammels', 'of', 'precision', 'and', 'he', 'put', 'it', 'to', 'us', 'in', 'this', 'way', 'marking', 'the', 'points', 'with', 'a', 'lean', 'forefinger', 'as', 'we', 'sat', 'and', 'lazily', 'admired', 'his', 'earnestness', 'over', 'this', 'new', 'paradox', 'as', 'we', 'thought', 'it', 'and', 'his', 'fecundity']
[]
['you', 'must', 'follow', 'me', 'carefully', 'i', 'shall', 'have', 'to', 'controvert', 'one', 'or', 'two', 'ideas', 'that', 'are', 'almost', 'universally', 'accepted', 'the', 'geometry', 'for', 'instance', 'they', 'taught', 'you', 'at', 'school', 'is', 'founded', 'on', 'a', 'misconception']
[]
['is', 'not', 'that', 'rather', 'a', 'large', 'thing', 'to', 'expect', 'us', 'to', 'begin', 'upon', 'said', 'filby', 'an', 'argumentative', 'person', 'with', 'red', 'hair']
[]
First 10 unigrams
the time m
First 10 bigrams
[('t', 'h'), ('h', 'e'), ('e', ' '), (' ', 't'), ('t', 'i'), ('i', 'm'), ('m', 'e'), ('e', ' '), (' ', 'm'), ('m', 'a')]
First 10 trigrams
[('t', 'h', 'e'), ('h', 'e', ' '), ('e', ' ', 't'), (' ', 't', 'i'), ('t', 'i', 'm'), ('i', 'm', 'e'), ('m', 'e', ' '), ('e', ' ', 'm'), (' ', 'm', 'a'), ('m', 'a', 'c')]
Most common unigrams
[(' ', 32453), ('e', 17838), ('t', 13515), ('a', 11704), ('i', 10138), ('n', 9917), ('o', 9758), ('s', 8486), ('h', 8257), ('r', 7674)]
Most common bigrams
[(('e', ' '), 6575), ((' ', 't'), 5506), (('t', 'h'), 4602), (('d', ' '), 4117), (('h', 'e'), 3859), ((' ', 'a'), 3818), (('t', ' '), 3511), (('s', ' '), 3147), (('i', 'n'), 2935), ((' ', 'i'), 2895)]
Most common trigrams
[((' ', 't', 'h'), 3888), (('t', 'h', 'e'), 3126), (('h', 'e', ' '), 2432), (('n', 'd', ' '), 1602), ((' ', 'a', 'n'), 1455), (('e', 'd', ' '), 1446), (('a', 'n', 'd'), 1398), ((' ', 'i', ' '), 1212), ((' ', 'o', 'f'), 1186), (('o', 'f', ' '), 1158)]
In [8]:
Out[8]:
In [9]:
Out[9]:
In [ ]: