Path: blob/master/lessons/lesson_14/News Corpus Binomial Naive Bayes - (done).ipynb
1904 views
Kernel: Python 3
Naive Bayes Test Case on Cogito Corpus
In [1]:
In [2]:
Out[2]:
['Business', 'Corruption', 'Finance']
In [3]:
Out[3]:
'assets/dataset/news_corpus/Business'
In [4]:
Out[4]:
In [5]:
Out[5]:
(300, 2)
In [6]:
In [7]:
In [8]:
Out[8]:
<180x44938 sparse matrix of type '<class 'numpy.int64'>'
with 83605 stored elements in Compressed Sparse Row format>
In [9]:
Out[9]:
matrix([[0, 0, 1, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=int64)
In [10]:
Out[10]:
['28 34',
'28 744',
'28 84',
'28 addition',
'28 aims',
'28 cent',
'28 cents',
'28 company',
'28 competitive',
'28 deliberating']
In [11]:
Out[11]:
0.975
In [12]:
Out[12]:
['Business' 'Corruption' 'Finance']
[[41 0 1]
[ 1 40 0]
[ 1 0 36]]
In [13]:
Out[13]:
Top Ten Features per Class:
Business : said, year, company, sales, quarter, profit, earnings, million, billion, new
Corruption : mr, said, zhou, corruption, party, china, mr zhou, state, charges, political
Finance : inflation, prices, year, said, bank, rate, cent, price, central, february
In [ ]: