Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
6320 views
default
Kernel: Python 3 (system-wide)
import numpy as np import matplotlib.pyplot as plt import pandas as pd
# This is how you read the csv file with open("Chocolate-Consumption.csv", 'r') as file: consumption = pd.read_csv(file, names = ['Country', 'Consumption'])
# Show the first 3 lines of the file consumption.head(3)
# Show the last 3 lines of the file consumption.tail(3)
# Sort by the country name consumption.sort_values(by = ['Country'])
consumption.plot.bar("Country", "Consumption") plt.show()
Image in a Jupyter notebook
with open("nobels.txt", "r") as file: prizes_list = file.readlines() prizes_list
['Rank\tEntity\tNobel\n', 'laureates[1]\tPopulation\n', '(2018)[2]\tLaureates/\n', '10 million\n', '—\t Faroe Islands\t1\t49,489\t202.065\n', '1\t Saint Lucia\t2\t179,667\t111.317\n', '2\t Luxembourg\t2\t590,321\t33.880\n', '3\t Switzerland\t28\t8,544,034\t32.771\n', '4\t Sweden\t30\t9,982,709\t30.052\n', '5\t Iceland\t1\t337,780\t29.605\n', '6\t Austria\t22\t8,751,820\t25.138\n', '7\t Denmark\t14\t5,754,356\t24.329\n', '8\t Norway\t13\t5,353,363\t24.284\n', '9\t United Kingdom\t133\t66,573,504\t19.429\n', '10\t East Timor\t2\t1,324,094\t15.105\n', '11\t Ireland\t7\t4,803,748\t14.572\n', '12\t Israel\t12\t8,452,841\t14.196\n', '13\t Hungary\t13\t9,688,847\t13.417\n', '13\t Germany\t108\t82,293,457\t13.245\n', '15\t United States\t383\t326,766,748\t11.721\n', '16\t Netherlands\t21\t17,084,459\t11.707\n', '17\t France\t70\t65,233,271\t10.664\n', '—\t European Union[3]\t378\t444,697,104\t8.005\n', '18\t Finland\t5\t5,542,517\t9.021\n', '19\t Belgium\t10\t11,498,519\t8.697\n', '20\t Cyprus\t1\t1,189,085\t8.410\n', '21\t Trinidad and Tobago\t1\t1,372,598\t7.285\n', '22\t Canada\t25\t36,953,765\t6.765\n', '23\t New Zealand\t3\t4,749,598\t6.316\n', '24\t Bosnia and Herzegovina\t2\t3,503,554\t5.708\n', '25\t Latvia\t1\t1,929,938\t5.182\n', '26\t Poland\t19\t38,104,832\t4.986\n', '27\t Australia\t12\t24,772,247\t4.844\n', '28\t Slovenia\t1\t2,081,260\t4.805\n', '29\t North Macedonia\t1\t2,085,051\t4.796\n', '30\t Czech Republic\t5\t10,625,250\t4.706\n', '31\t Liberia\t2\t4,853,516\t4.121\n', '32\t Lithuania\t1\t2,876,475\t3.476\n', '33\t Italy\t20\t59,290,969\t3.373\n', '—\t Tibet[4]\t1\t3,310,836\t3.020\n', '34\t Croatia\t1\t4,164,783\t2.401\n', '35\t Japan\t28\t127,185,332\t2.202\n', '36\t Belarus\t2\t9,452,113\t2.116\n', '37\t Romania\t4\t19,580,634\t2.043\n', '38\t Costa Rica\t1\t4,953,199\t2.019\n', '39\t Palestine\t1\t5,052,776\t1.979\n', '40\t Portugal\t2\t10,291,196\t1.943\n', '41\t Greece\t2\t11,142,161\t1.795\n', '42\t South Africa\t10\t57,398,421\t1.742\n', '43\t Spain\t8\t46,397,452\t1.724\n', '44\t Russia\t23\t143,964,709\t1.598\n', '45\t Bulgaria\t1\t7,036,848\t1.421\n', '—\t Hong Kong\t1\t7,428,887\t1.346\n', '—\t World[5]\t919\t7,632,819,325\t1.204\n', '46\t Guatemala\t2\t17,245,346\t1.160\n', '47\t Argentina\t5\t44,688,864\t1.119\n', '48\t Chile\t2\t18,197,209\t1.099\n', '49\t Azerbaijan\t1\t9,923,914\t1.008\n', '50\t Algeria\t2\t42,008,054\t0.476\n', '51\t Ukraine\t2\t44,009,214\t0.454\n', '52\t Taiwan\t1\t23,694,089\t0.422\n', '53\t Colombia\t2\t49,464,683\t0.404\n', '54\t Egypt\t4\t99,375,741\t0.403\n', '55\t South Korea\t2\t51,164,435\t0.391\n', '56\t Yemen\t1\t28,915,284\t0.346\n', '57\t Ghana\t1\t29,463,643\t0.339\n', '58\t Venezuela\t1\t32,381,221\t0.309\n', '59\t Peru\t1\t32,551,815\t0.307\n', '60\t Morocco\t1\t36,191,805\t0.276\n', '61\t Iraq\t1\t39,339,753\t0.254\n', '62\t Turkey\t2\t81,916,871\t0.244\n', '63\t Iran\t2\t82,011,735\t0.244\n', '64\t Mexico\t3\t130,759,074\t0.229\n', '65\t Kenya\t1\t50,950,879\t0.196\n', '66\t Myanmar\t1\t53,855,735\t0.186\n', '67\t DR Congo\t1\t84,004,989\t0.119\n', '68\t Vietnam\t1\t96,491,146\t0.104\n', '69\t Pakistan\t2\t200,813,818\t0.100\n', '70\t Ethiopia\t1\t109,224,410[6]\t0.092\n', '71\t India\t11\t1,354,051,854\t0.081\n', '72\t China\t9\t1,415,045,928\t0.064\n', '73\t Bangladesh\t1\t166,368,149\t0.060\n', '74\t Nigeria\t1\t195,875,237\t0.051']
print(*prizes_list)
Rank Entity Nobel laureates[1] Population (2018)[2] Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 — European Union[3] 378 444,697,104 8.005 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet[4] 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 — World[5] 919 7,632,819,325 1.204 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410[6] 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# reassemble the header line - first remove newlines for i in range(3): prizes_list[i] = prizes_list[i].rstrip() prizes_list[:4]
['Rank\tEntity\tNobel', 'laureates[1]\tPopulation', '(2018)[2]\tLaureates/', '10 million\n']
# Then join it up (putting in a space where the newlines were) prizes2 = [' '.join(prizes_list[:4])] + prizes_list[4:] print(*prizes2)
Rank Entity Nobel laureates[1] Population (2018)[2] Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 — European Union[3] 378 444,697,104 8.005 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet[4] 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 — World[5] 919 7,632,819,325 1.204 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410[6] 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# Use regular expression to remove reference num import re prizes3 = [ re.sub('\[\d+\]', '', line) for line in prizes2 ] print(*prizes3)
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 — European Union 378 444,697,104 8.005 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 — World 919 7,632,819,325 1.204 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# The first method to remove words prizes4 = [line for line in prizes3 if not ('World' in line or 'Europe'in line)] print(*prizes4)
Rank Entity Nobel laureates Population (2018) Laureates/ 10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
prizes4 = [line for line in prizes3 if not re.search('(World)|(Europe)', line)] prizes4[0] = re.sub('/ ', '/', prizes4[0]) print(*prizes4)
Rank Entity Nobel laureates Population (2018) Laureates/10 million — Faroe Islands 1 49,489 202.065 1 Saint Lucia 2 179,667 111.317 2 Luxembourg 2 590,321 33.880 3 Switzerland 28 8,544,034 32.771 4 Sweden 30 9,982,709 30.052 5 Iceland 1 337,780 29.605 6 Austria 22 8,751,820 25.138 7 Denmark 14 5,754,356 24.329 8 Norway 13 5,353,363 24.284 9 United Kingdom 133 66,573,504 19.429 10 East Timor 2 1,324,094 15.105 11 Ireland 7 4,803,748 14.572 12 Israel 12 8,452,841 14.196 13 Hungary 13 9,688,847 13.417 13 Germany 108 82,293,457 13.245 15 United States 383 326,766,748 11.721 16 Netherlands 21 17,084,459 11.707 17 France 70 65,233,271 10.664 18 Finland 5 5,542,517 9.021 19 Belgium 10 11,498,519 8.697 20 Cyprus 1 1,189,085 8.410 21 Trinidad and Tobago 1 1,372,598 7.285 22 Canada 25 36,953,765 6.765 23 New Zealand 3 4,749,598 6.316 24 Bosnia and Herzegovina 2 3,503,554 5.708 25 Latvia 1 1,929,938 5.182 26 Poland 19 38,104,832 4.986 27 Australia 12 24,772,247 4.844 28 Slovenia 1 2,081,260 4.805 29 North Macedonia 1 2,085,051 4.796 30 Czech Republic 5 10,625,250 4.706 31 Liberia 2 4,853,516 4.121 32 Lithuania 1 2,876,475 3.476 33 Italy 20 59,290,969 3.373 — Tibet 1 3,310,836 3.020 34 Croatia 1 4,164,783 2.401 35 Japan 28 127,185,332 2.202 36 Belarus 2 9,452,113 2.116 37 Romania 4 19,580,634 2.043 38 Costa Rica 1 4,953,199 2.019 39 Palestine 1 5,052,776 1.979 40 Portugal 2 10,291,196 1.943 41 Greece 2 11,142,161 1.795 42 South Africa 10 57,398,421 1.742 43 Spain 8 46,397,452 1.724 44 Russia 23 143,964,709 1.598 45 Bulgaria 1 7,036,848 1.421 — Hong Kong 1 7,428,887 1.346 46 Guatemala 2 17,245,346 1.160 47 Argentina 5 44,688,864 1.119 48 Chile 2 18,197,209 1.099 49 Azerbaijan 1 9,923,914 1.008 50 Algeria 2 42,008,054 0.476 51 Ukraine 2 44,009,214 0.454 52 Taiwan 1 23,694,089 0.422 53 Colombia 2 49,464,683 0.404 54 Egypt 4 99,375,741 0.403 55 South Korea 2 51,164,435 0.391 56 Yemen 1 28,915,284 0.346 57 Ghana 1 29,463,643 0.339 58 Venezuela 1 32,381,221 0.309 59 Peru 1 32,551,815 0.307 60 Morocco 1 36,191,805 0.276 61 Iraq 1 39,339,753 0.254 62 Turkey 2 81,916,871 0.244 63 Iran 2 82,011,735 0.244 64 Mexico 3 130,759,074 0.229 65 Kenya 1 50,950,879 0.196 66 Myanmar 1 53,855,735 0.186 67 DR Congo 1 84,004,989 0.119 68 Vietnam 1 96,491,146 0.104 69 Pakistan 2 200,813,818 0.100 70 Ethiopia 1 109,224,410 0.092 71 India 11 1,354,051,854 0.081 72 China 9 1,415,045,928 0.064 73 Bangladesh 1 166,368,149 0.060 74 Nigeria 1 195,875,237 0.051
# Now we are going to construct the dataframe # First line contains the headers # The rest of the data prizes = pd.DataFrame(columns = prizes4[0].split("\t"), data = [row.split('\t') for row in prizes4[1:]]) # Defaults to displays the first 5 rows prizes.head()
# Get ("index, locate") item using only indices prizes.iloc[4, 1]
' Sweden'
# Get ("locate") item using category name prizes.loc[4, "Entity"]
' Sweden'
# Get all the data from that columns prizes.loc[:, "Entity"]
0 Faroe Islands 1 Saint Lucia 2 Luxembourg 3 Switzerland 4 Sweden ... 72 Ethiopia 73 India 74 China 75 Bangladesh 76 Nigeria Name: Entity, Length: 77, dtype: object
# Another way to get all data from Entity prizes['Entity']
0 Faroe Islands 1 Saint Lucia 2 Luxembourg 3 Switzerland 4 Sweden ... 72 Ethiopia 73 India 74 China 75 Bangladesh 76 Nigeria Name: Entity, Length: 77, dtype: object
prizes3a = pd.DataFrame(columns = prizes3[0].split("\t"), data = [row.split('\t') for row in prizes3[1:]]) prizes3a[16:20]
euro_mask = (prizes3a["Entity"] == " European Union") euro_mask[16:20]
16 False 17 False 18 True 19 False Name: Entity, dtype: bool
prizes3b = prizes3a[euro_mask] prizes3b
prizes3c = prizes3a[np.logical_not(euro_mask)] prizes3c[16:20]
prizes.loc[4, "Entity"]
' Sweden'
# Working on a single item prizes.loc[4, "Entity"] = prizes.loc[4, "Entity"].strip() prizes.loc[4, "Entity"]
'Sweden'
# Working on the whole series # Strip whitespace from every entity prizes["Entity"] = prizes["Entity"].apply(str.strip) prizes.loc[6, "Entity"] # Now we get rid of the extra whitespace from Entity column
'Austria'
# Now get rid of the extra whitespace from last column prizes["Laureates/10 million\n"] = prizes["Laureates/10 million\n"].apply(str.strip) prizes.loc[5, "Laureates/10 million\n"]
'29.605'
prizes.head()
prizes.columns[4]
'Laureates/10 million\n'
prizes.rename(columns = {prizes.columns[4] : prizes.columns[4].strip()}, inplace=True) prizes.columns[4]
'Laureates/10 million'
prizes.head()
prizes.dtypes
Rank object Entity object Nobel laureates object Population (2018) object Laureates/10 million object dtype: object
# First we got a copy of that dataframe prizesnums = prizes.copy() prizesnums["Laureates/10 million"] = pd.to_numeric(prizesnums["Laureates/10 million"]) prizesnums.dtypes
Rank object Entity object Nobel laureates object Population (2018) object Laureates/10 million float64 dtype: object
prizes["Population (2018)"] = prizes["Population (2018)"].apply(str.replace, args=(',', '')) prizes["Population (2018)"]
0 49489 1 179667 2 590321 3 8544034 4 9982709 ... 72 109224410 73 1354051854 74 1415045928 75 166368149 76 195875237 Name: Population (2018), Length: 77, dtype: object
prizesnums["Population (2018)"] = pd.to_numeric(prizesnums["Population (2018)"]) prizesnums.dtypes
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric() ValueError: Unable to parse string "49,489" During handling of the above exception, another exception occurred: ValueError Traceback (most recent call last) <ipython-input-34-d44213833c91> in <module> ----> 1 prizesnums["Population (2018)"] = pd.to_numeric(prizesnums["Population (2018)"]) 2 3 prizesnums.dtypes /usr/local/lib/python3.8/dist-packages/pandas/core/tools/numeric.py in to_numeric(arg, errors, downcast) 150 coerce_numeric = errors not in ("ignore", "raise") 151 try: --> 152 values = lib.maybe_convert_numeric( 153 values, set(), coerce_numeric=coerce_numeric 154 ) pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric() ValueError: Unable to parse string "49,489" at position 0
prizesnums = prizes.astype({"Nobel laureates" : np.int64, "Population (2018)" : np.int64, "Laureates/10 million" : np.float64}) prizesnums.dtypes
Rank object Entity object Nobel laureates int64 Population (2018) int64 Laureates/10 million float64 dtype: object
# By default it tries to plot all numerical columns prizesnums.plot.bar() plt.show()
Image in a Jupyter notebook
prizesnums.plot.bar("Entity", "Nobel laureates") plt.show()
Image in a Jupyter notebook
prizesnums.sort_values("Nobel laureates", ascending=False, inplace=True) prizesnums.plot.bar("Entity", "Nobel laureates") plt.show()
Image in a Jupyter notebook
# Mask out the smaller one prizesnums[prizesnums["Nobel laureates"]>2].plot.bar("Entity", "Nobel laureates") plt.show()
Image in a Jupyter notebook
# Sorting the per capita value ---> the organge one prizesnums.sort_values("Laureates/10 million", ascending=False, inplace=True) # Compare two or more columns prizesnums[prizesnums["Laureates/10 million"]>5].plot.bar("Entity", ["Nobel laureates", "Laureates/10 million"]) plt.show()
Image in a Jupyter notebook
prizesnums.plot.scatter("Population (2018)", "Nobel laureates") plt.show()
Image in a Jupyter notebook
prizesnums[prizesnums["Population (2018)"]<300000000].plot.scatter("Population (2018)", "Nobel laureates") plt.show()
Image in a Jupyter notebook
merged = consumption.merge(prizesnums, left_on="Country", right_on="Entity", validate="1:1") merged
print(prizesnums.shape) print(consumption.shape) print(merged.shape)
(77, 5) (14, 2) (12, 7)
merged = consumption.merge(prizesnums, left_on="Country", right_on="Entity", validate="1:1", how="outer", sort=True) merged
merged.head(40)
merged.tail(40)
consumption.replace("UK", "United Kingdom", inplace=True) consumption
prizesnums = prizesnums.append({"Entity" : "Estonia", "Nobel laureates" : 0, "Laureates/10 million" : 0}, ignore_index=True) prizesnums
prizesnums.dtypes
Rank object Entity object Nobel laureates int64 Population (2018) float64 Laureates/10 million float64 dtype: object
merged = consumption.merge(prizesnums, left_on="Country", right_on="Entity", validate="1:1") merged
merged.plot.scatter("Consumption", "Laureates/10 million", linewidth = 5) plt.show()
Image in a Jupyter notebook
merged.corr()
wo_estonia = merged[ merged["Entity"] != "Estonia"] wo_estonia.plot.scatter("Consumption", "Laureates/10 million", linewidth=5) plt.show() wo_estonia.corr()
Image in a Jupyter notebook
from scipy.stats import linregress # Also return r and p value plt.figure(figsize = (10, 8)) (slope, intercept, r, p, _) = linregress(merged["Consumption"], merged["Laureates/10 million"]) plt.scatter(merged["Consumption"], merged["Laureates/10 million"], linewidth = 5) plt.plot(merged["Consumption"], slope * merged["Consumption"] + intercept, "k", linewidth = 3, label = "Linear Model") plt.xlabel("Chocolate Consumption (kg/year/person)") plt.ylabel("Nobel Laureates per capita x 10^6") plt.legend() plt.xlim([0, 15]) plt.show() print("r = ", round(r, 3)) print("p < 0.05 (", round(p, 3), ")")
Image in a Jupyter notebook
r = 0.534 p < 0.05 ( 0.049 )
slope
1.9518222295023373