Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
amanchadha
GitHub Repository: amanchadha/coursera-natural-language-processing-specialization
Path: blob/master/4 - Natural Language Processing with Attention Models/Week 3/data/squad/plain_text/1.0.0/dataset_info.json
65 views
1
{
2
"citation": "@article{2016arXiv160605250R,\n author = {{Rajpurkar}, Pranav and {Zhang}, Jian and {Lopyrev},\n Konstantin and {Liang}, Percy},\n title = \"{SQuAD: 100,000+ Questions for Machine Comprehension of Text}\",\n journal = {arXiv e-prints},\n year = 2016,\n eid = {arXiv:1606.05250},\n pages = {arXiv:1606.05250},\narchivePrefix = {arXiv},\n eprint = {1606.05250},\n}",
3
"description": "Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.",
4
"downloadSize": "35142551",
5
"location": {
6
"urls": [
7
"https://rajpurkar.github.io/SQuAD-explorer/"
8
]
9
},
10
"name": "squad",
11
"schema": {
12
"feature": [
13
{
14
"name": "answers"
15
},
16
{
17
"name": "context",
18
"type": "BYTES"
19
},
20
{
21
"name": "id",
22
"type": "BYTES"
23
},
24
{
25
"name": "question",
26
"type": "BYTES"
27
},
28
{
29
"name": "title",
30
"type": "BYTES"
31
}
32
]
33
},
34
"sizeInBytes": "35142551",
35
"splits": [
36
{
37
"name": "train",
38
"numShards": "10",
39
"shardLengths": [
40
"87599"
41
],
42
"statistics": {
43
"features": [
44
{
45
"bytesStats": {
46
"commonStats": {
47
"numNonMissing": "87599"
48
}
49
},
50
"name": "context",
51
"type": "BYTES"
52
},
53
{
54
"bytesStats": {
55
"commonStats": {
56
"numNonMissing": "87599"
57
}
58
},
59
"name": "id",
60
"type": "BYTES"
61
},
62
{
63
"bytesStats": {
64
"commonStats": {
65
"numNonMissing": "87599"
66
}
67
},
68
"name": "question",
69
"type": "BYTES"
70
},
71
{
72
"bytesStats": {
73
"commonStats": {
74
"numNonMissing": "87599"
75
}
76
},
77
"name": "title",
78
"type": "BYTES"
79
}
80
],
81
"numExamples": "87599"
82
}
83
},
84
{
85
"name": "validation",
86
"numShards": "1",
87
"shardLengths": [
88
"10570"
89
],
90
"statistics": {
91
"features": [
92
{
93
"bytesStats": {
94
"commonStats": {
95
"numNonMissing": "10570"
96
}
97
},
98
"name": "context",
99
"type": "BYTES"
100
},
101
{
102
"bytesStats": {
103
"commonStats": {
104
"numNonMissing": "10570"
105
}
106
},
107
"name": "id",
108
"type": "BYTES"
109
},
110
{
111
"bytesStats": {
112
"commonStats": {
113
"numNonMissing": "10570"
114
}
115
},
116
"name": "question",
117
"type": "BYTES"
118
},
119
{
120
"bytesStats": {
121
"commonStats": {
122
"numNonMissing": "10570"
123
}
124
},
125
"name": "title",
126
"type": "BYTES"
127
}
128
],
129
"numExamples": "10570"
130
}
131
}
132
],
133
"version": "1.0.0"
134
}
135