CoCalc -- dataset

GitHub Repository: amanchadha/coursera-natural-language-processing-specialization
Path: blob/master/4 - Natural Language Processing with Attention Models/Week 3/data/squad/plain_text/1.0.0/dataset_info.json
⁶⁵ views
1
{
2
  "citation": "@article{2016arXiv160605250R,\n       author = {{Rajpurkar}, Pranav and {Zhang}, Jian and {Lopyrev},\n                 Konstantin and {Liang}, Percy},\n        title = \"{SQuAD: 100,000+ Questions for Machine Comprehension of Text}\",\n      journal = {arXiv e-prints},\n         year = 2016,\n          eid = {arXiv:1606.05250},\n        pages = {arXiv:1606.05250},\narchivePrefix = {arXiv},\n       eprint = {1606.05250},\n}",
3
  "description": "Stanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.",
4
  "downloadSize": "35142551",
5
  "location": {
6
    "urls": [
7
      "https://rajpurkar.github.io/SQuAD-explorer/"
8
    ]
9
  },
10
  "name": "squad",
11
  "schema": {
12
    "feature": [
13
      {
14
        "name": "answers"
15
      },
16
      {
17
        "name": "context",
18
        "type": "BYTES"
19
      },
20
      {
21
        "name": "id",
22
        "type": "BYTES"
23
      },
24
      {
25
        "name": "question",
26
        "type": "BYTES"
27
      },
28
      {
29
        "name": "title",
30
        "type": "BYTES"
31
      }
32
    ]
33
  },
34
  "sizeInBytes": "35142551",
35
  "splits": [
36
    {
37
      "name": "train",
38
      "numShards": "10",
39
      "shardLengths": [
40
        "87599"
41
      ],
42
      "statistics": {
43
        "features": [
44
          {
45
            "bytesStats": {
46
              "commonStats": {
47
                "numNonMissing": "87599"
48
              }
49
            },
50
            "name": "context",
51
            "type": "BYTES"
52
          },
53
          {
54
            "bytesStats": {
55
              "commonStats": {
56
                "numNonMissing": "87599"
57
              }
58
            },
59
            "name": "id",
60
            "type": "BYTES"
61
          },
62
          {
63
            "bytesStats": {
64
              "commonStats": {
65
                "numNonMissing": "87599"
66
              }
67
            },
68
            "name": "question",
69
            "type": "BYTES"
70
          },
71
          {
72
            "bytesStats": {
73
              "commonStats": {
74
                "numNonMissing": "87599"
75
              }
76
            },
77
            "name": "title",
78
            "type": "BYTES"
79
          }
80
        ],
81
        "numExamples": "87599"
82
      }
83
    },
84
    {
85
      "name": "validation",
86
      "numShards": "1",
87
      "shardLengths": [
88
        "10570"
89
      ],
90
      "statistics": {
91
        "features": [
92
          {
93
            "bytesStats": {
94
              "commonStats": {
95
                "numNonMissing": "10570"
96
              }
97
            },
98
            "name": "context",
99
            "type": "BYTES"
100
          },
101
          {
102
            "bytesStats": {
103
              "commonStats": {
104
                "numNonMissing": "10570"
105
              }
106
            },
107
            "name": "id",
108
            "type": "BYTES"
109
          },
110
          {
111
            "bytesStats": {
112
              "commonStats": {
113
                "numNonMissing": "10570"
114
              }
115
            },
116
            "name": "question",
117
            "type": "BYTES"
118
          },
119
          {
120
            "bytesStats": {
121
              "commonStats": {
122
                "numNonMissing": "10570"
123
              }
124
            },
125
            "name": "title",
126
            "type": "BYTES"
127
          }
128
        ],
129
        "numExamples": "10570"
130
      }
131
    }
132
  ],
133
  "version": "1.0.0"
134
}
135
Product

Resources

Company