Source code for scalexi.document_loaders.context_loaders

import pandas as pd

[docs] class ContextExtractor: """ A class to extract specific columns from a CSV file as pandas Series or DataFrame. This class provides methods to read a specified column from a CSV file and return it as either a pandas Series or DataFrame. It's useful for data processing tasks where only specific column data is required from a larger dataset. :method from_csv_as_series: Reads a CSV file and returns the specified column as a pandas Series. :type from_csv_as_series: method :method from_csv_as_df: Reads a CSV file and returns the specified column as a pandas DataFrame. :type from_csv_as_df: method """
[docs] def from_csv_as_series(self, csv_file_path, column_name="context", encoding="utf-8"): """ Reads a CSV file and returns the specified column as a pandas Series. This method is designed to extract a single column from a CSV file and present it as a pandas Series, which can be useful for further data analysis or processing. :param csv_file_path: The path to the CSV file. :type csv_file_path: str :param column_name: The name of the column to extract. Default is "context". :type column_name: str, optional :param encoding: The encoding of the CSV file. Default is "utf-8". :type encoding: str, optional :return: The specified column as a pandas Series. :rtype: pandas.Series :raises FileNotFoundError: If the CSV file does not exist. :raises ValueError: If the specified column does not exist in the CSV. :raises Exception: For any other exceptions that may occur. :example: :: >>> extractor = ContextExtractor() >>> series = extractor.from_csv_as_series("data.csv", "context") """ try: df = pd.read_csv(csv_file_path, encoding=encoding) except FileNotFoundError: raise FileNotFoundError(f"The file '{csv_file_path}' was not found.") except Exception as e: raise Exception(f"Error processing the file: {e}") if column_name in df.columns: return df[column_name] else: raise ValueError(f"The column '{column_name}' does not exist in the CSV.")
[docs] def from_csv_as_df(self, csv_file_path, column_name="context", encoding="utf-8"): """ Reads a CSV file and returns the specified column as a pandas DataFrame. This method extracts a single column from a CSV file and presents it as a pandas DataFrame. It's particularly useful when only one column of data is needed for analysis or processing. :param csv_file_path: The path to the CSV file. :type csv_file_path: str :param column_name: The name of the column to extract. Default is "context". :type column_name: str, optional :param encoding: The encoding of the CSV file. Default is "utf-8". :type encoding: str, optional :return: The specified column as a pandas DataFrame. :rtype: pandas.DataFrame :raises FileNotFoundError: If the CSV file does not exist. :raises ValueError: If the specified column does not exist in the CSV. :raises Exception: For any other exceptions that may occur. :example: :: >>> extractor = ContextExtractor() >>> dataframe = extractor.from_csv_as_df("data.csv", "context") """ try: df = pd.read_csv(csv_file_path, encoding=encoding) except FileNotFoundError: raise FileNotFoundError(f"The file '{csv_file_path}' was not found.") except Exception as e: raise Exception(f"Error processing the file: {e}") if column_name in df.columns: return df[[column_name]] else: raise ValueError(f"The column '{column_name}' does not exist in the CSV.")