Source code for backtrader.feeds.csvgeneric

#!/usr/bin/env python
"""Generic CSV Data Feed Module - CSV file parsing.

This module provides the GenericCSVData feed for parsing CSV files
with customizable column mappings for backtesting.

Classes:
    GenericCSVData: Parses CSV files with configurable column mappings.

Example:
    >>> data = bt.feeds.GenericCSVData(
    ...     dataname='data.csv',
    ...     datetime=0,
    ...     open=1,
    ...     high=2,
    ...     low=3,
    ...     close=4,
    ...     volume=5
    ... )
    >>> cerebro.adddata(data)
"""

from datetime import datetime, timezone

from .. import feed
from ..dataseries import TimeFrame
from ..utils import date2num
from ..utils.py3 import integer_types, string_types

# Python 3.11+ has datetime.UTC, earlier versions use timezone.utc
UTC = timezone.utc



[docs]
class GenericCSVData(feed.CSVDataBase):
    """Parses a CSV file according to the order and field presence defined by the
    parameters

    Specific parameters (or specific meaning):

      - ``dataname``: The filename to parse or a file-like object

      - The lines parameters (datetime, open, high ...) take numeric values

        A value of -1 indicates absence of that field in the CSV source

      - If ``time`` is present (parameter time >=0), the source contains
        separated fields for date and time, which will be combined

      - ``nullvalue``

        Value that will be used if a value which should be there is missing
        (the CSV field is empty)

      - ``dtformat``: Format used to parse the datetime CSV field. See the
        python strptime/strftime documentation for the format.

        If a numeric value is specified, it will be interpreted as follows

          - ``1``: The value is a Unix timestamp of a type ``int`` representing
            the number of seconds since Jan 1st, 1970

          - ``2``: The value is a Unix timestamp of a type ``float``

        If a **callable** is passed

          - It will accept a string and return a `datetime.datetime` python
            instance

      - ``tmformat``: Format used to parse the time CSV field if "present"
        (the default for the "time" CSV field is not to be present)

    """

    # Common parameters for csv data
    params = (
        ("nullvalue", float("NaN")),
        ("dtformat", "%Y-%m-%d %H:%M:%S"),
        ("tmformat", "%H:%M:%S"),
        ("datetime", 0),
        ("time", -1),
        ("open", 1),
        ("high", 2),
        ("low", 3),
        ("close", 4),
        ("volume", 5),
        ("openinterest", 6),
    )

    def __init__(self, *args, **kwargs):
        """Initialize the Generic CSV data feed.

        Args:
            *args: Positional arguments for data feed configuration.
            **kwargs: Keyword arguments for data feed configuration.
        """
        super().__init__(*args, **kwargs)
        self._dtconvert = None
        self._dtstr = None


[docs]
    def start(self):
        """Start the Generic CSV data feed.

        Sets up datetime conversion based on dtformat parameter.
        """
        super().start()
        # If string type, set self._dtstr to True, otherwise default is False
        self._dtstr = False
        if isinstance(self.p.dtformat, string_types):
            self._dtstr = True
        # If integer, set time conversion method based on different integer values
        elif isinstance(self.p.dtformat, integer_types):
            idt = int(self.p.dtformat)
            if idt == 1:
                # self._dtconvert = lambda x: datetime.utcfromtimestamp(int(x))
                self._dtconvert = lambda x: datetime.fromtimestamp(int(x), UTC)
            elif idt == 2:
                # self._dtconvert = lambda x: datetime.utcfromtimestamp(float(x))
                self._dtconvert = lambda x: datetime.fromtimestamp(float(x), UTC)
        # If dtformat is callable, conversion method is itself
        else:  # assume callable
            self._dtconvert = self.p.dtformat


    # After reading csv file line, split line's data into linetokens, then further processing
    def _loadline(self, linetokens):
        # Datetime needs special treatment
        # First get specific date based on datetime order
        dtfield = linetokens[self.p.datetime]
        # If time is string format
        if self._dtstr:
            # Specific time format
            dtformat = self.p.dtformat
            # If there's time column, combine date and time together
            if self.p.time >= 0:
                # add time value and format if it's in a separate field
                dtfield += "T" + linetokens[self.p.time]
                dtformat += "T" + self.p.tmformat
            # Then convert string time to datetime format time
            dt = datetime.strptime(dtfield, dtformat)
        # If not string, call time conversion function _dtconvert set in start
        else:
            dt = self._dtconvert(dtfield)
        # If trading interval is greater than or equal to day
        if self.p.timeframe >= TimeFrame.Days:
            # check if the expected end of session is larger than parsed
            # If _tzinput is True, need to localize date, otherwise date remains original
            if self._tzinput:
                dtin = self._tzinput.localize(dt)  # pytz compatible-ized
            else:
                dtin = dt
            # Use date2num to convert date to number
            dtnum = date2num(dtin)  # utc'ize
            # Combine date and sessionend, convert to number
            dteos = datetime.combine(dt.date(), self.p.sessionend)
            dteosnum = self.date2num(dteos)  # utc'ize
            # If number converted from combined sessionend date is greater than converted date number, use former number as time
            if dteosnum > dtnum:
                self.lines.datetime[0] = dteosnum
            # If not greater, if self._tzinput is True, directly convert dt to time, if not True, use original dtnum
            else:
                # Avoid reconversion if already converted dtin == dt
                self.l.datetime[0] = date2num(dt) if self._tzinput else dtnum
        # If trading cycle is less than day, convert time directly
        else:
            self.lines.datetime[0] = date2num(dt)

        # PERFORMANCE OPTIMIZATION: Cache field mappings on first call
        # Avoids repeated getattr calls (619K+ calls to _loadline)
        field_cache = getattr(self, "_field_cache", None)
        if field_cache is None:
            field_cache = []
            p = self.p
            lines = self.lines
            nullvalue = p.nullvalue
            for linefield in self.getlinealiases():
                if linefield != "datetime":
                    csvidx = getattr(p, linefield)
                    line = getattr(lines, linefield)
                    field_cache.append((csvidx, line, nullvalue))
            self._field_cache = field_cache
            self._nullvalue = nullvalue

        # Process cached fields
        nullvalue = self._nullvalue
        for csvidx, line, _ in field_cache:
            if csvidx is None or csvidx < 0:
                csvfield = nullvalue
            else:
                csvfield = linetokens[csvidx]
                if csvfield == "":
                    csvfield = nullvalue
            line[0] = float(csvfield)

        return True




[docs]
class GenericCSV(feed.CSVFeedBase):
    """Generic CSV feed class.

    Wrapper class for GenericCSVData feed functionality.
    """

    DataCls = GenericCSVData