Skip to content

Commit 74eea1f

Browse files
authored
feat: make Column and Row iterable (#55)
Closes #47. ### Summary of Changes * Add `__iter__` method to `Column` and `Row` to iterate over the values: * Iterating over a `Column` returns the values. * Iterating over a `Row` returns the column names, as specified in the [documenetation of `__iter__`](https://docs.python.org/3/reference/datamodel.html#object.__iter__). * Add `__len__` method to `Column` and `Row` to compute their length. * Change superclasses of exceptions as needed for [`__getitem__`](https://docs.python.org/3/reference/datamodel.html#object.__getitem__): * Change superclass of `IndexOutOfBoundsError` to `IndexError`. * Change superclass of `UnknownColumnNameError` to `KeyError`. --------- Co-authored-by: lars-reimann <[email protected]>
1 parent c3fd3b5 commit 74eea1f

File tree

14 files changed

+102
-34
lines changed

14 files changed

+102
-34
lines changed

src/safeds/data/tabular/_column.py

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import typing
44
from numbers import Number
5-
from typing import Any, Callable
5+
from typing import Any, Callable, Iterator
66

77
import numpy as np
88
import pandas as pd
@@ -34,6 +34,10 @@ def name(self) -> str:
3434
"""
3535
return self._name
3636

37+
@property
38+
def statistics(self) -> ColumnStatistics:
39+
return ColumnStatistics(self)
40+
3741
@property
3842
def type(self) -> ColumnType:
3943
"""
@@ -46,9 +50,35 @@ def type(self) -> ColumnType:
4650
"""
4751
return self._type
4852

53+
def __eq__(self, other: object) -> bool:
54+
if not isinstance(other, Column):
55+
return NotImplemented
56+
if self is other:
57+
return True
58+
return self._data.equals(other._data) and self.name == other.name
59+
4960
def __getitem__(self, index: int) -> Any:
5061
return self.get_value(index)
5162

63+
def __hash__(self) -> int:
64+
return hash(self._data)
65+
66+
def __iter__(self) -> Iterator[Any]:
67+
return iter(self._data)
68+
69+
def __len__(self) -> int:
70+
return len(self._data)
71+
72+
def __repr__(self) -> str:
73+
tmp = self._data.to_frame()
74+
tmp.columns = [self.name]
75+
return tmp.__repr__()
76+
77+
def __str__(self) -> str:
78+
tmp = self._data.to_frame()
79+
tmp.columns = [self.name]
80+
return tmp.__str__()
81+
5282
def get_value(self, index: int) -> Any:
5383
"""
5484
Return column value at specified index, starting at 0.
@@ -73,10 +103,6 @@ def get_value(self, index: int) -> Any:
73103

74104
return self._data[index]
75105

76-
@property
77-
def statistics(self) -> ColumnStatistics:
78-
return ColumnStatistics(self)
79-
80106
def count(self) -> int:
81107
"""
82108
Return the number of elements in the column.
@@ -223,26 +249,6 @@ def get_unique_values(self) -> list[typing.Any]:
223249
"""
224250
return list(self._data.unique())
225251

226-
def __eq__(self, other: object) -> bool:
227-
if not isinstance(other, Column):
228-
return NotImplemented
229-
if self is other:
230-
return True
231-
return self._data.equals(other._data) and self.name == other.name
232-
233-
def __hash__(self) -> int:
234-
return hash(self._data)
235-
236-
def __str__(self) -> str:
237-
tmp = self._data.to_frame()
238-
tmp.columns = [self.name]
239-
return tmp.__str__()
240-
241-
def __repr__(self) -> str:
242-
tmp = self._data.to_frame()
243-
tmp.columns = [self.name]
244-
return tmp.__repr__()
245-
246252
def _ipython_display_(self) -> DisplayHandle:
247253
"""
248254
Return a display object for the column to be used in Jupyter Notebooks.
@@ -378,7 +384,6 @@ def sum(self) -> float:
378384
return self._column._data.sum()
379385

380386
def variance(self) -> float:
381-
382387
"""
383388
Return the variance of the column. The column has to be numerical.
384389
@@ -401,7 +406,6 @@ def variance(self) -> float:
401406
return self._column._data.var()
402407

403408
def standard_deviation(self) -> float:
404-
405409
"""
406410
Return the standard deviation of the column. The column has to be numerical.
407411

src/safeds/data/tabular/_row.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ def __init__(self, data: typing.Iterable, schema: TableSchema):
1616
def __getitem__(self, column_name: str) -> Any:
1717
return self.get_value(column_name)
1818

19+
def __iter__(self) -> typing.Iterator[Any]:
20+
return iter(self.get_column_names())
21+
22+
def __len__(self) -> int:
23+
return len(self._data)
24+
1925
def get_value(self, column_name: str) -> Any:
2026
"""
2127
Return the value of a specified column.
@@ -34,6 +40,17 @@ def get_value(self, column_name: str) -> Any:
3440
raise UnknownColumnNameError([column_name])
3541
return self._data[self.schema._get_column_index_by_name(column_name)]
3642

43+
def count(self) -> int:
44+
"""
45+
Return the number of columns in this row.
46+
47+
Returns
48+
-------
49+
count : int
50+
The number of columns.
51+
"""
52+
return len(self._data)
53+
3754
def has_column(self, column_name: str) -> bool:
3855
"""
3956
Return whether the row contains a given column.

src/safeds/exceptions/_data_exceptions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
class UnknownColumnNameError(Exception):
1+
class UnknownColumnNameError(KeyError):
22
"""
33
Exception raised for trying to access an invalid column name.
44
@@ -37,7 +37,7 @@ def __init__(self, column_name: str):
3737
super().__init__(f"Column '{column_name}' already exists.")
3838

3939

40-
class IndexOutOfBoundsError(Exception):
40+
class IndexOutOfBoundsError(IndexError):
4141
"""
4242
Exception raised for trying to access an element by an index that does not exist in the underlying data.
4343

tests/safeds/data/tabular/_column/test_column.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def test_from_columns() -> None:
99
assert column1._type == column2._type
1010

1111

12-
def negative_test_from_columns() -> None:
12+
def test_from_columns_negative() -> None:
1313
column1 = Column(pd.Series([1, 4]), "A")
1414
column2 = Column(pd.Series(["2", "5"]), "B")
1515

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import pandas as pd
2-
from safeds.data.tabular import Table
1+
from safeds.data.tabular import Column
32

43

54
def test_count_valid() -> None:
6-
table = Table(pd.DataFrame(data={"col1": [1, 2, 3, 4, 5], "col2": [2, 3, 4, 5, 6]}))
7-
assert table.get_column("col1").count() == 5
5+
column = Column([1, 2, 3, 4, 5], "col1")
6+
assert column.count() == 5
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from safeds.data.tabular import Column
2+
3+
4+
def test_iter() -> None:
5+
column = Column([0, "1"], "testColumn")
6+
assert list(column) == [0, "1"]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from safeds.data.tabular import Column
2+
3+
4+
def test_count_valid() -> None:
5+
column = Column([1, 2, 3, 4, 5], "col1")
6+
assert len(column) == 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from safeds.data.tabular import Row
2+
from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema
3+
4+
5+
def test_count() -> None:
6+
row = Row(
7+
[0, "1"],
8+
TableSchema(
9+
{"testColumn1": IntColumnType(), "testColumn2": StringColumnType()}
10+
),
11+
)
12+
assert row.count() == 2
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from safeds.data.tabular import Row
2+
from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema
3+
4+
5+
def test_iter() -> None:
6+
row = Row(
7+
[0, "1"],
8+
TableSchema(
9+
{"testColumn1": IntColumnType(), "testColumn2": StringColumnType()}
10+
),
11+
)
12+
assert list(row) == ["testColumn1", "testColumn2"]
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from safeds.data.tabular import Row
2+
from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema
3+
4+
5+
def test_count() -> None:
6+
row = Row(
7+
[0, "1"],
8+
TableSchema(
9+
{"testColumn1": IntColumnType(), "testColumn2": StringColumnType()}
10+
),
11+
)
12+
assert len(row) == 2

0 commit comments

Comments
 (0)