{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "This notebook is primarily taken from <a href=\"https://realpython.com/pandas-settingwithcopywarning/\" target=\"_blank\">SettingWithCopyWarning in Pandas: Views vs Copies, by Mirko Stojiljković</a>."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
    "import pandas as pd\n",
    "from pandas import DataFrame\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>8</td>\n",
       "      <td>27</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>16</td>\n",
       "      <td>81</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    x   y   z\n",
       "a   1   1  45\n",
       "b   2   3  98\n",
       "c   4   9  24\n",
       "d   8  27  11\n",
       "e  16  81  64"
      ]
     },
     "execution_count": 36,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = {\"x\": 2**np.arange(5),\n",
    "        \"y\": 3**np.arange(5),\n",
    "        \"z\": np.array([45, 98, 24, 11, 64])\n",
    "}\n",
    "\n",
    "index = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n",
    "\n",
    "df = DataFrame(data = data, index=index)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Create a boolean array named `mask` that has a True value for all rows in `df` for which the value in column 'z' is less than 50."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     True\n",
       "b    False\n",
       "c     True\n",
       "d     True\n",
       "e    False\n",
       "Name: z, dtype: bool"
      ]
     },
     "execution_count": 37,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mask = df['z'] < 50\n",
    "mask"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "And look at the rows in `df` for which `mask` contains a value of **True**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>8</td>\n",
       "      <td>27</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   x   y   z\n",
       "a  1   1  45\n",
       "c  4   9  24\n",
       "d  8  27  11"
      ]
     },
     "execution_count": 38,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[mask]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Now we want to set all the values in column `z` of `df` for those rows where `mask` contains True."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_2151/2435874596.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df[mask]['z'] = 0\n"
     ]
    }
   ],
   "source": [
    "df[mask]['z'] = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Here's an illustration that shows what's happening.\n",
    "\n",
    "<img src=\"https://files.realpython.com/media/fig-02.494ca4d6f8c3.png\" height=\"300px\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Why does the application of `mask` return a **copy** of `df` rather than a `slice`?  NumPy, and therefore Pandas, wants to keep all the elements of a `Series` in a contiguous block of memory.  From **High-performance embedded computing** by Cardoso, Coutinho, and Diniz:\n",
    "\n",
    "<blockquote cite=\"https://www.sciencedirect.com/topics/computer-science/single-instruction-multiple-data\">\n",
    "    To exploit SIMD units, it is very important to be able to combine multiple load or store accesses in a single SIMD instruction. This can be achieved when using <strong>contiguous memory accesses</strong>, e.g., in the presence of unit stride accesses, and when array elements are aligned.\n",
    "</blockquote>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Perhaps surprisingly, you **don't** get the warning message if you execute the following cell."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>8</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>16</td>\n",
       "      <td>81</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    x   y   z\n",
       "a   1   1   0\n",
       "b   2   3  98\n",
       "c   4   9   0\n",
       "d   8  27   0\n",
       "e  16  81  64"
      ]
     },
     "execution_count": 41,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"z\"][mask] = 0\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "The picture below illustrates why\n",
    "<img src=\"https://files.realpython.com/media/fig-03.8a314d168d47.png\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Pandas knows that `df['z']` is **not a copy** of the data in `df`, and implements the assignment statement directly on that portion of memory."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "##  Implementation of the \\[\\] operator in Python\n",
    "\n",
    "When creating an object that supports indexing via the `[ ]` notation, Python requires classes like `DataFrame` and `Series` to implement two **methods**:  `__getitem__` and `__setitem__` (creatively referred to as **dunder** methods).\n",
    "\n",
    "So this means that statements like\n",
    "```\n",
    "df[mask][\"z\"] = 0\n",
    "```\n",
    "and \n",
    "```\n",
    "df[\"z\"][mask] = 0\n",
    "```\n",
    "\n",
    "involve 3 method calls:\n",
    "\n",
    "* 2 calls to `__getitem__`\n",
    "* 1 call to `__setitem__`\n",
    "\n",
    "These methods can make use of an **internal** `DataFrame` / `Series` method named `_is_view` in their implementations.  If something is a **view**, it's not a **copy**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df is copy: True\n",
      "df[mask] is copy: True\n",
      "df['z'] is copy: False\n"
     ]
    }
   ],
   "source": [
    "print(f\"df is copy: {not df._is_view}\")\n",
    "print(f\"df[mask] is copy: {not df[mask]._is_view}\")\n",
    "print(f\"df['z'] is copy: {not df['z']._is_view}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "##  Using accessor methods for assignment\n",
    "\n",
    "Stojiljković advises that you:\n",
    "\n",
    "* **Avoid chained assignments** that combine 2 or more indexing operations like `df[mask]['z'] = 0` and `df.loc[mask][\"z\"] = 0`\n",
    "* **Apply single assignments** with just one indexing operation, like `df.loc[mask, \"z\"] = 0`\n",
    "\n",
    "Other methods that are generally safer include `iloc`, `at`, and `iat`.  `at/iat` allow you to access a **single** element of a `DataFrame` / `Series` by providing a row/column combination."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>8</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>16</td>\n",
       "      <td>81</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    x   y   z\n",
       "a   1   1   0\n",
       "b   2   3  98\n",
       "c   4   9   0\n",
       "d   8  27   0\n",
       "e  16  81  64"
      ]
     },
     "execution_count": 34,
     "metadata": {
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[mask, \"z\"] = 0\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "You can control whether Pandas issues the `SettingWithCopyWarning` by calling `pd.set_option` like this:\n",
    "\n",
    "* `pd.set_option(\"mode.chained_assignment\", \"raise\") `\n",
    "* `pd.set_option(\"mode.chained_assignment\", \"warn\") ` **default**\n",
    "* `pd.set_option(\"mode.chained_assignment\", None) `"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "For more information , <a href=\"https://realpython.com/pandas-settingwithcopywarning/#views-and-copies-in-numpy-and-pandas\" target=\"_blank\">continue reading Stojiljković's article.</a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
   ],
   "source": [
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "argv": [
    "/usr/bin/python3",
    "-m",
    "ipykernel",
    "--HistoryManager.enabled=False",
    "--matplotlib=inline",
    "-c",
    "%config InlineBackend.figure_formats = set(['retina'])\nimport matplotlib; matplotlib.rcParams['figure.figsize'] = (12, 7)",
    "-f",
    "{connection_file}"
   ],
   "display_name": "Python 3 (system-wide)",
   "env": {
   },
   "language": "python",
   "metadata": {
    "cocalc": {
     "description": "Python 3 programming language",
     "priority": 100,
     "url": "https://www.python.org/"
    }
   },
   "name": "python3",
   "resource_dir": "/ext/jupyter/kernels/python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}