Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
REMitchell
GitHub Repository: REMitchell/python-crawling
Path: blob/master/3-Advanced-Crawlers/mysql-optional/crawler.sql
164 views
1
-- phpMyAdmin SQL Dump
2
-- version 4.2.5
3
-- http://www.phpmyadmin.net
4
--
5
-- Host: localhost:8889
6
-- Generation Time: Oct 17, 2015 at 05:33 PM
7
-- Server version: 5.5.38
8
-- PHP Version: 5.5.14
9
10
SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
11
SET time_zone = "+00:00";
12
13
--
14
-- Database: `articleCrawler`
15
--
16
17
-- --------------------------------------------------------
18
19
--
20
-- Table structure for table `content`
21
--
22
23
CREATE TABLE `content` (
24
`id` int(11) NOT NULL,
25
`topicId` int(11) NOT NULL,
26
`siteId` int(11) NOT NULL,
27
`title` varchar(1000) NOT NULL,
28
`body` varchar(10000) NOT NULL,
29
`url` varchar(300) DEFAULT NULL
30
) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=28 ;
31
32
-- --------------------------------------------------------
33
34
--
35
-- Table structure for table `Sites`
36
--
37
38
CREATE TABLE `sites` (
39
`id` int(11) NOT NULL,
40
`name` varchar(255) NOT NULL,
41
`url` varchar(511) NOT NULL,
42
`searchUrl` varchar(511) NOT NULL,
43
`resultListing` varchar(127) NOT NULL,
44
`resultUrl` varchar(127) NOT NULL,
45
`absoluteUrl` varchar(127) NOT NULL,
46
`pageTitle` varchar(127) NOT NULL,
47
`pageBody` varchar(127) NOT NULL
48
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;
49
50
-- --------------------------------------------------------
51
52
--
53
-- Table structure for table `topics`
54
--
55
56
CREATE TABLE `topics` (
57
`id` int(11) NOT NULL,
58
`name` varchar(100) NOT NULL
59
) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=15 ;
60
61
--
62
-- Indexes for dumped tables
63
--
64
65
--
66
-- Indexes for table `content`
67
--
68
ALTER TABLE `content`
69
ADD PRIMARY KEY (`id`);
70
71
--
72
-- Indexes for table `Sites`
73
--
74
ALTER TABLE `Sites`
75
ADD PRIMARY KEY (`id`);
76
77
--
78
-- Indexes for table `topics`
79
--
80
ALTER TABLE `topics`
81
ADD PRIMARY KEY (`id`);
82
83
--
84
-- AUTO_INCREMENT for dumped tables
85
--
86
87
--
88
-- AUTO_INCREMENT for table `content`
89
--
90
ALTER TABLE `content`
91
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT,AUTO_INCREMENT=28;
92
--
93
-- AUTO_INCREMENT for table `Sites`
94
--
95
ALTER TABLE `Sites`
96
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
97
--
98
-- AUTO_INCREMENT for table `topics`
99
--
100
ALTER TABLE `topics`
101
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT,AUTO_INCREMENT=15;
102
103
104
INSERT INTO sites (name,url,searchUrl,resultListing,resultUrl,absoluteUrl,pageTitle,pageBody)
105
106
VALUES("Brookings","http://www.brookings.edu","http://www.brookings.edu/search?start=1&q=","ul.search-results li","h3.title a","FALSE","h1","div[itemprop=\"articleBody\"]"),
107
108
("Reuters","http://reuters.com","http://www.reuters.com/search/news?blob=","div.search-result-content","h3.search-result-title a","TRUE","h1","span#articleText");
109
110
INSERT INTO `topics` (`name`) VALUES
111
('data'),
112
('star%20wars');
113