Source code for scalexi.document_loaders.context_loaders
import pandas as pd
[docs]
class ContextExtractor:
    """
    A class to extract specific columns from a CSV file as pandas Series or DataFrame.
    This class provides methods to read a specified column from a CSV file and return it as either a pandas Series or DataFrame. 
    It's useful for data processing tasks where only specific column data is required from a larger dataset.
    :method from_csv_as_series: Reads a CSV file and returns the specified column as a pandas Series.
    :type from_csv_as_series: method
    :method from_csv_as_df: Reads a CSV file and returns the specified column as a pandas DataFrame.
    :type from_csv_as_df: method
    """
[docs]
    def from_csv_as_series(self, csv_file_path, column_name="context", encoding="utf-8"):
        """
        Reads a CSV file and returns the specified column as a pandas Series.
        This method is designed to extract a single column from a CSV file and present it as a pandas Series, 
        which can be useful for further data analysis or processing.
        :param csv_file_path: The path to the CSV file.
        :type csv_file_path: str
        :param column_name: The name of the column to extract. Default is "context".
        :type column_name: str, optional
        :param encoding: The encoding of the CSV file. Default is "utf-8".
        :type encoding: str, optional
        :return: The specified column as a pandas Series.
        :rtype: pandas.Series
        :raises FileNotFoundError: If the CSV file does not exist.
        :raises ValueError: If the specified column does not exist in the CSV.
        :raises Exception: For any other exceptions that may occur.
        :example:
        ::
            >>> extractor = ContextExtractor()
            >>> series = extractor.from_csv_as_series("data.csv", "context")
        """
        try:
            df = pd.read_csv(csv_file_path, encoding=encoding)
        except FileNotFoundError:
            raise FileNotFoundError(f"The file '{csv_file_path}' was not found.")
        except Exception as e:
            raise Exception(f"Error processing the file: {e}")
        if column_name in df.columns:
            return df[column_name]
        else:
            raise ValueError(f"The column '{column_name}' does not exist in the CSV.")
[docs]
    def from_csv_as_df(self, csv_file_path, column_name="context", encoding="utf-8"):
        """
        Reads a CSV file and returns the specified column as a pandas DataFrame.
        This method extracts a single column from a CSV file and presents it as a pandas DataFrame. 
        It's particularly useful when only one column of data is needed for analysis or processing.
        :param csv_file_path: The path to the CSV file.
        :type csv_file_path: str
        :param column_name: The name of the column to extract. Default is "context".
        :type column_name: str, optional
        :param encoding: The encoding of the CSV file. Default is "utf-8".
        :type encoding: str, optional
        :return: The specified column as a pandas DataFrame.
        :rtype: pandas.DataFrame
        :raises FileNotFoundError: If the CSV file does not exist.
        :raises ValueError: If the specified column does not exist in the CSV.
        :raises Exception: For any other exceptions that may occur.
        :example:
        ::
            >>> extractor = ContextExtractor()
            >>> dataframe = extractor.from_csv_as_df("data.csv", "context")
        """
        try:
            df = pd.read_csv(csv_file_path, encoding=encoding)
        except FileNotFoundError:
            raise FileNotFoundError(f"The file '{csv_file_path}' was not found.")
        except Exception as e:
            raise Exception(f"Error processing the file: {e}")
        if column_name in df.columns:
            return df[[column_name]]
        else:
            raise ValueError(f"The column '{column_name}' does not exist in the CSV.")