diff --git a/.kokoro/release-nightly.sh b/.kokoro/release-nightly.sh
index 5624df3b8d..7da0881bbe 100755
--- a/.kokoro/release-nightly.sh
+++ b/.kokoro/release-nightly.sh
@@ -106,6 +106,7 @@ for gcs_path in gs://vertex_sdk_private_releases/bigframe/ \
# write access to
COVERAGE_TABLE=bigframes-metrics.coverage_report.bigframes_coverage_nightly
python3.10 scripts/publish_api_coverage.py \
+ bigquery \
--bigframes_version=$BIGFRAMES_VERSION \
--release_version=$RELEASE_VERSION \
--bigquery_table=$COVERAGE_TABLE
diff --git a/docs/index.rst b/docs/index.rst
index d239ea3a78..b17ac7cbd9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,6 +7,7 @@ API reference
:maxdepth: 3
reference/index
+ supported_pandas_apis
Changelog
---------
diff --git a/docs/supported_pandas_apis.rst b/docs/supported_pandas_apis.rst
new file mode 100644
index 0000000000..f4b57f05d1
--- /dev/null
+++ b/docs/supported_pandas_apis.rst
@@ -0,0 +1,62 @@
+Supported pandas APIs
+=====================
+
+The following tables show the pandas APIs that have been implemented (or not)
+in BigQuery DataFrames.
+
+* 'Y' means it implements all parameters.
+* 'P' means it implements only some parameters.
+
+DataFrame
+---------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_dataframe.html
+
+DataFrameGroupBy
+----------------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_dataframegroupby.html
+
+Index
+-----
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_index.html
+
+pandas module
+-------------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_pandas.html
+
+Series
+------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_series.html
+
+Series.dt methods
+-----------------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_datetimemethods.html
+
+Series.str methods
+------------------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_stringmethods.html
+
+SeriesGroupBy
+-------------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_seriesgroupby.html
+
+Window
+------
+
+.. raw:: html
+ :file: supported_pandas_apis/bf_window.html
diff --git a/docs/supported_pandas_apis/.gitignore b/docs/supported_pandas_apis/.gitignore
new file mode 100644
index 0000000000..2d19fc766d
--- /dev/null
+++ b/docs/supported_pandas_apis/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index c07e6141f1..57b0522d04 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -72,6 +72,8 @@
name: Series
- name: Window
uid: bigframes.core.window.Window
+ - href: supported_pandas_apis.html
+ name: Supported pandas APIs
name: bigframes.pandas
- items:
- items:
diff --git a/noxfile.py b/noxfile.py
index a5e77964f1..4ac3a81723 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -467,6 +467,12 @@ def docs(session):
)
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
+
+ session.run(
+ "python",
+ "scripts/publish_api_coverage.py",
+ "docs",
+ )
session.run(
"sphinx-build",
"-W", # warnings as errors
@@ -503,6 +509,12 @@ def docfx(session):
)
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
+
+ session.run(
+ "python",
+ "scripts/publish_api_coverage.py",
+ "docs",
+ )
session.run(
"sphinx-build",
"-T", # show full traceback on exception
diff --git a/scripts/publish_api_coverage.py b/scripts/publish_api_coverage.py
index 856307e440..4a35ade9ef 100644
--- a/scripts/publish_api_coverage.py
+++ b/scripts/publish_api_coverage.py
@@ -17,27 +17,110 @@
import argparse
import inspect
+import pathlib
+import sys
import pandas as pd
+import pandas.core.groupby
+import pandas.core.indexes.accessors
+import pandas.core.strings.accessor
+import pandas.core.window.rolling
+import bigframes
+import bigframes.core.groupby
+import bigframes.core.window
+import bigframes.operations.datetimes
import bigframes.pandas as bpd
+REPO_ROOT = pathlib.Path(__file__).parent.parent
+
+URL_PREFIX = {
+ "pandas": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.pandas#bigframes_pandas_"
+ ),
+ "dataframe": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.dataframe.DataFrame#bigframes_dataframe_DataFrame_"
+ ),
+ "dataframegroupby": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.DataFrameGroupBy#bigframes_core_groupby_DataFrameGroupBy_"
+ ),
+ "series": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.series.Series#bigframes_series_Series_"
+ ),
+ "seriesgroupby": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.groupby.SeriesGroupBy#bigframes_core_groupby_SeriesGroupBy_"
+ ),
+ "datetimemethods": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.datetimes.DatetimeMethods#bigframes_operations_datetimes_DatetimeMethods_"
+ ),
+ "stringmethods": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.operations.strings.StringMethods#bigframes_operations_strings_StringMethods_"
+ ),
+ "window": (
+ "https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.core.window.Window#bigframes_core_window_Window_"
+ ),
+ # TODO: Index not documented.
+}
+
+
+PANDAS_TARGETS = [
+ ("pandas", pd, bpd),
+ ("dataframe", pd.DataFrame, bpd.DataFrame),
+ (
+ "dataframegroupby",
+ pandas.core.groupby.DataFrameGroupBy,
+ bigframes.core.groupby.DataFrameGroupBy,
+ ),
+ ("series", pd.Series, bpd.Series),
+ (
+ "seriesgroupby",
+ pandas.core.groupby.DataFrameGroupBy,
+ bigframes.core.groupby.DataFrameGroupBy,
+ ),
+ (
+ "datetimemethods",
+ pandas.core.indexes.accessors.CombinedDatetimelikeProperties,
+ bigframes.operations.datetimes.DatetimeMethods,
+ ),
+ (
+ "stringmethods",
+ pandas.core.strings.accessor.StringMethods,
+ bigframes.operations.strings.StringMethods,
+ ),
+ (
+ "window",
+ pandas.core.window.rolling.Rolling,
+ bigframes.core.window.Window,
+ ),
+ ("index", pd.Index, bpd.Index),
+]
+
+
+def names_from_signature(signature):
+ """Extract the names of parameters from signature
+
+ See: https://docs.python.org/3/library/inspect.html#inspect.signature
+ """
+ return frozenset({parameter for parameter in signature.parameters})
+
+
+def calculate_missing_parameters(bigframes_function, target_function):
+ bigframes_params = names_from_signature(inspect.signature(bigframes_function))
+ target_params = names_from_signature(inspect.signature(target_function))
+ return target_params - bigframes_params
+
def generate_pandas_api_coverage():
"""Inspect all our pandas objects, and compare with the real pandas objects, to see
which methods we implement. For each, generate a regex that can be used to check if
its present in a notebook"""
- header = ["api", "pattern", "kind", "is_in_bigframes"]
+ header = ["api", "pattern", "kind", "is_in_bigframes", "missing_parameters"]
api_patterns = []
- targets = [
- ("pandas", pd, bpd),
- ("dataframe", pd.DataFrame, bpd.DataFrame),
- ("series", pd.Series, bpd.Series),
- ("index", pd.Index, bpd.Index),
- ]
indexers = ["loc", "iloc", "iat", "ix", "at"]
- for name, pandas_obj, bigframes_obj in targets:
+ for name, pandas_obj, bigframes_obj in PANDAS_TARGETS:
for member in dir(pandas_obj):
+ missing_parameters = ""
+
# skip private functions and properties
if member[0] == "_" and member[1] != "_":
continue
@@ -50,6 +133,17 @@ def generate_pandas_api_coverage():
# Function, match .member(
token = f"\\.{member}\\("
token_type = "function"
+
+ if hasattr(bigframes_obj, member):
+ bigframes_function = getattr(bigframes_obj, member)
+ pandas_function = getattr(pandas_obj, member)
+ missing_parameters = ", ".join(
+ sorted(
+ calculate_missing_parameters(
+ bigframes_function, pandas_function
+ )
+ )
+ )
elif member in indexers:
# Indexer, match .indexer[
token = f"\\.{member}\\["
@@ -62,7 +156,13 @@ def generate_pandas_api_coverage():
is_in_bigframes = hasattr(bigframes_obj, member)
api_patterns.append(
- [f"{name}.{member}", token, token_type, is_in_bigframes]
+ [
+ f"{name}.{member}",
+ token,
+ token_type,
+ is_in_bigframes,
+ missing_parameters,
+ ]
)
return pd.DataFrame(api_patterns, columns=header)
@@ -165,14 +265,112 @@ def build_api_coverage_table(bigframes_version: str, release_version: str):
return combined_df.infer_objects().convert_dtypes()
+def format_api(api_names, is_in_bigframes, api_prefix):
+ api_names = api_names.str.slice(start=len(f"{api_prefix}."))
+ formatted = "" + api_names + "
"
+ url_prefix = URL_PREFIX.get(api_prefix)
+ if url_prefix is None:
+ return formatted
+
+ linked = '' + formatted + ""
+ return formatted.mask(is_in_bigframes, linked)
+
+
+def generate_api_coverage(df, api_prefix):
+ dataframe_apis = df.loc[df["api"].str.startswith(f"{api_prefix}.")]
+ fully_implemented = (
+ dataframe_apis["missing_parameters"].str.len() == 0
+ ) & dataframe_apis["is_in_bigframes"]
+ partial_implemented = (
+ dataframe_apis["missing_parameters"].str.len() != 0
+ ) & dataframe_apis["is_in_bigframes"]
+ not_implemented = ~dataframe_apis["is_in_bigframes"]
+ dataframe_table = pd.DataFrame(
+ {
+ "API": format_api(
+ dataframe_apis["api"],
+ dataframe_apis["is_in_bigframes"],
+ api_prefix,
+ ),
+ "Implemented": "",
+ "Missing parameters": dataframe_apis["missing_parameters"],
+ }
+ )
+ dataframe_table.loc[fully_implemented, "Implemented"] = "Y"
+ dataframe_table.loc[partial_implemented, "Implemented"] = "P"
+ dataframe_table.loc[not_implemented, "Implemented"] = "N"
+ return dataframe_table
+
+
+def generate_api_coverage_doc(df, api_prefix):
+ dataframe_table = generate_api_coverage(df, api_prefix)
+ dataframe_table = dataframe_table.loc[~(dataframe_table["Implemented"] == "N")]
+ dataframe_table["Implemented"] = dataframe_table["Implemented"].map(
+ {
+ "Y": "Y",
+ "P": "P",
+ }
+ )
+
+ with open(
+ REPO_ROOT / "docs" / "supported_pandas_apis" / f"bf_{api_prefix}.html",
+ "w",
+ ) as html_file:
+ dataframe_table.to_html(
+ html_file, index=False, header=True, escape=False, border=0, col_space="8em"
+ )
+
+
+def generate_api_coverage_docs(df):
+ for target in PANDAS_TARGETS:
+ api_prefix = target[0]
+ generate_api_coverage_doc(df, api_prefix)
+
+
+def print_api_coverage_summary(df, api_prefix):
+ dataframe_table = generate_api_coverage(df, api_prefix)
+
+ print(api_prefix)
+ print(dataframe_table[["Implemented", "API"]].groupby(["Implemented"]).count())
+ print(f"{api_prefix} APIs: {dataframe_table.shape[0]}\n")
+
+
+def print_api_coverage_summaries(df):
+ for target in PANDAS_TARGETS:
+ api_prefix = target[0]
+ print_api_coverage_summary(df, api_prefix)
+
+ print(f"\nAll APIs: {len(df.index)}")
+ fully_implemented = (df["missing_parameters"].str.len() == 0) & df[
+ "is_in_bigframes"
+ ]
+ print(f"Y: {fully_implemented.sum()}")
+ partial_implemented = (df["missing_parameters"].str.len() != 0) & df[
+ "is_in_bigframes"
+ ]
+ print(f"P: {partial_implemented.sum()}")
+ not_implemented = ~df["is_in_bigframes"]
+ print(f"N: {not_implemented.sum()}")
+
+
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("--bigframes_version")
- parser.add_argument("--release_version")
+ parser.add_argument("output_type")
+ parser.add_argument("--bigframes_version", default=bigframes.__version__)
+ parser.add_argument("--release_version", default="")
parser.add_argument("--bigquery_table_name")
args = parser.parse_args()
df = build_api_coverage_table(args.bigframes_version, args.release_version)
- df.to_gbq(args.bigquery_table_name, if_exists="append")
+
+ if args.output_type == "bigquery":
+ df.to_gbq(args.bigquery_table_name, if_exists="append")
+ elif args.output_type == "docs":
+ generate_api_coverage_docs(df)
+ elif args.output_type == "summary":
+ print_api_coverage_summaries(df)
+ else:
+ print(f"Unexpected output_type {repr(args.output_type)}")
+ sys.exit(1)
if __name__ == "__main__":
diff --git a/scripts/test_publish_api_coverage.py b/scripts/test_publish_api_coverage.py
index 96b2d1bb48..061cc1c25c 100644
--- a/scripts/test_publish_api_coverage.py
+++ b/scripts/test_publish_api_coverage.py
@@ -27,6 +27,7 @@ def test_api_coverage_produces_expected_schema():
"string",
"boolean",
"string",
+ "string",
"datetime64[ns]",
"string",
"string",
@@ -36,6 +37,7 @@ def test_api_coverage_produces_expected_schema():
"pattern",
"kind",
"is_in_bigframes",
+ "missing_parameters",
"module",
"timestamp",
"bigframes_version",