Merge pull request #203 from bkemper24/main

bkemper24 · web-flow · commit 47ca0c844efa · 2026-03-20T07:22:25.000-04:00
updates to tests for Pandas 3 changes
diff --git a/swat/tests/cas/test_bygroups.py b/swat/tests/cas/test_bygroups.py
@@ -112,8 +112,17 @@ def assertTablesEqual(self, a, b, fillna=-999999, sortby=SORT_KEYS,
             a = a.sort_values(sortby)
             b = b.sort_values(sortby)
         self.assertEqual(list(a.columns), list(b.columns))
-        a = a.fillna(value=fillna)
-        b = b.fillna(value=fillna)
+
+        if pd_version >= (2, 2, 0) and pd_version < (3, 0, 0):
+            # fix 2.2 and 2.3 FutureWarning:
+            #  Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated
+            with pd.option_context('future.no_silent_downcasting', True):
+                a = a.fillna(value=fillna)
+                b = b.fillna(value=fillna)
+        else:
+            a = a.fillna(value=fillna)
+            b = b.fillna(value=fillna)
+
         for lista, listb in zip(list(a.to_records(index=include_index)),
                                 list(b.to_records(index=include_index))):
             lista = list(lista)
@@ -701,7 +710,8 @@ def test_column_max(self):
                                sortby=['Origin', 'EngineSize'])
 
     @unittest.skipIf(pd_version < (0, 16, 0), 'Need newer version of Pandas')
-    @unittest.skipIf(pd_version >= (1, 0, 0), 'Raises AssertionError in Pandas 1')
+    @unittest.skipIf(pd_version >= (1, 0, 0) and pd_version < (2, 0, 0),
+                     'Raises AssertionError in Pandas 1')
     def test_max(self):
         df = self.get_cars_df().sort_values(SORT_KEYS)
         tbl = self.table.sort_values(SORT_KEYS)
diff --git a/swat/tests/cas/test_datamsg.py b/swat/tests/cas/test_datamsg.py
@@ -350,10 +350,13 @@ def test_text(self):
         self.assertTablesEqual(f, s, sortby=SORT_KEYS)
 
     def test_json(self):
+        import io
         df = self.table.to_frame()
         jsondf = df.to_json()
 
-        dmh = swat.datamsghandlers.JSON(jsondf)
+        # Pandas 3 no longer supports passing a json text string.
+        # You must pass a file path or object with a read method
+        dmh = swat.datamsghandlers.JSON(io.StringIO(jsondf))
 
         tbl = self.s.addtable(table='cars', **dmh.args.addtable).casTable
 
diff --git a/swat/tests/cas/test_table.py b/swat/tests/cas/test_table.py
@@ -1162,13 +1162,16 @@ def test_describe(self):
         self.assertEqual(desc.loc['freq'].tolist(), dfdesc.loc['freq'].tolist())
 
         # Percentiles
+        # Pandas < 3 always includes percentile 0.5 even if you don't ask for it
+        # Starting with Pandas 3, percentile 0.5 is not included unless you ask for it
+        # CASTable always includes 0.5, regardless of pandas version.
         desc = self.table.describe(percentiles=[0.3, 0.7])
-        dfdesc = df.describe(percentiles=[0.3, 0.7])
+        dfdesc = df.describe(percentiles=[0.3, 0.5, 0.7])
         self.assertEqual(desc.index.tolist(), dfdesc.index.tolist())
         self.assertEqual(desc.columns.tolist(), dfdesc.columns.tolist())
 
         desc = self.table.describe(percentiles=0.4)
-        dfdesc = df.describe(percentiles=[0.4])
+        dfdesc = df.describe(percentiles=[0.4, 0.5])
         self.assertEqual(desc.index.tolist(), dfdesc.index.tolist())
         self.assertEqual(desc.columns.tolist(), dfdesc.columns.tolist())
 
@@ -1536,17 +1539,39 @@ def test_mode(self):
         tblgrp = tbl[['Make', 'Type']].groupby(['Make'])
 
         # TODO: Pandas mode sets columns with all unique values to NaN
-        self.assertEqual(
-            dfgrp.get_group('Acura').mode()[['Type']].to_csv(index=False),
-            tblgrp.mode().loc['Acura', ['Type']].dropna(how='all').to_csv(index=False))
+        if pd_version >= (2, 2, 0):
+            # Syntax Change in pandas 3.
+            # Future Warning in Pandas 2.2+
+            # When grouping with a length-1 list-like,
+            # you will need to pass a length-1 tuple to get_group
+            self.assertEqual(
+                dfgrp.get_group(('Acura',)).mode()[['Type']].to_csv(index=False),
+                tblgrp.mode().loc['Acura', ['Type']].dropna(how='all')
+                .to_csv(index=False))
+        else:
+            self.assertEqual(
+                dfgrp.get_group('Acura').mode()[['Type']].to_csv(index=False),
+                tblgrp.mode().loc['Acura', ['Type']].dropna(how='all')
+                .to_csv(index=False))
 
         dfgrp = df[['Cylinders', 'MPG_City']].groupby(['Cylinders'])
         tblgrp = tbl[['Cylinders', 'MPG_City']].groupby(['Cylinders'])
 
         # TODO: Pandas mode sets columns with all unique values to NaN
-        self.assertEqual(
-            dfgrp.get_group(6.0).mode()[['MPG_City']].to_csv(index=False),
-            tblgrp.mode().loc[6.0, ['MPG_City']].dropna(how='all').to_csv(index=False))
+        if pd_version >= (2, 2, 0):
+            # Syntax Change in pandas 3.
+            # Future Warning in Pandas 2.2+
+            # When grouping with a length-1 list-like,
+            # you will need to pass a length-1 tuple to get_group
+            self.assertEqual(
+                dfgrp.get_group((6.0,)).mode()[['MPG_City']].to_csv(index=False),
+                tblgrp.mode().loc[6.0, ['MPG_City']].dropna(how='all')
+                .to_csv(index=False))
+        else:
+            self.assertEqual(
+                dfgrp.get_group(6.0).mode()[['MPG_City']].to_csv(index=False),
+                tblgrp.mode().loc[6.0, ['MPG_City']].dropna(how='all')
+                .to_csv(index=False))
 
     def test_median(self):
         df = self.get_cars_df()
@@ -4652,7 +4677,9 @@ def test_to_html(self):
 
         html = tbl.to_html(index=False)
 
-        df2 = pd.read_html(html)[0]
+        # Starting with Pandas 3 you can no longer
+        # pass an html string to pandas read_html.
+        df2 = pd.read_html(io.StringIO(html))[0]
 
         df['Model'] = df['Model'].str.strip()
 
diff --git a/swat/utils/testing.py b/swat/utils/testing.py
@@ -37,6 +37,8 @@
 
 RE_TYPE = type(re.compile(r''))
 
+pd_version = tuple([int(x) for x in re.match(r'^(\d+)\.(\d+)\.(\d+)',
+                                             pd.__version__).groups()])
 
 warnings.filterwarnings('ignore', category=OptionWarning)
 warnings.filterwarnings('ignore', category=RuntimeWarning)
@@ -93,8 +95,17 @@ def assertTablesEqual(self, a, b, fillna=-999999, sortby=None, precision=None):
             a = a.sort_values(sortby, na_position='first')
             b = b.sort_values(sortby, na_position='first')
         self.assertEqual(list(a.columns), list(b.columns))
-        a = a.fillna(value=fillna)
-        b = b.fillna(value=fillna)
+
+        if pd_version >= (2, 2, 0) and pd_version < (3, 0, 0):
+            # fix 2.2 and 2.3 FutureWarning:
+            # Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated
+            with pd.option_context('future.no_silent_downcasting', True):
+                a = a.fillna(value=fillna)
+                b = b.fillna(value=fillna)
+        else:
+            a = a.fillna(value=fillna)
+            b = b.fillna(value=fillna)
+
         if precision is not None:
             a = a.round(decimals=precision)
             b = b.round(decimals=precision)
@@ -108,8 +119,17 @@ def assertColsEqual(self, a, b, fillna=-999999, sort=False, precision=None):
             a = a.to_series()
         if hasattr(b, 'to_series'):
             b = b.to_series()
-        a = a.fillna(value=fillna)
-        b = b.fillna(value=fillna)
+
+        if pd_version >= (2, 2, 0) and pd_version < (3, 0, 0):
+            # fix 2.2 and 2.3 FutureWarning:
+            # Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated
+            with pd.option_context('future.no_silent_downcasting', True):
+                a = a.fillna(value=fillna)
+                b = b.fillna(value=fillna)
+        else:
+            a = a.fillna(value=fillna)
+            b = b.fillna(value=fillna)
+
         if precision is not None:
             a = a.round(decimals=precision)
             b = b.round(decimals=precision)