Source code for backtrader.feeds.yahoo

#!/usr/bin/env python
"""Yahoo Finance Data Feed Module - Yahoo CSV data parsing.

This module provides the YahooFinanceCSVData feed for parsing
pre-downloaded Yahoo Finance CSV files.

Classes:
    YahooFinanceCSVData: Parses Yahoo Finance format CSV files.

Example:
    >>> data = bt.feeds.YahooFinanceCSVData(dataname='yahoo.csv')
    >>> cerebro.adddata(data)
"""

import collections
import io
import itertools
from datetime import date, datetime

from .. import feed
from ..dataseries import TimeFrame
from ..utils import date2num


[docs] class YahooFinanceCSVData(feed.CSVDataBase): """ Parses pre-downloaded Yahoo CSV Data Feeds (or locally generated if they comply to the Yahoo format) Specific parameters: - ``dataname`` The filename to parse or a file-like object - ``reverse`` It is assumed that locally stored files have the newest lines at the bottom If this is not the case, pass *reverse* = ``True`` - ``adjclose`` (default: ``True``) Whether to use the dividend/split adjusted close and adjust all values according to it. - ``adjvolume`` (default: ``True``) Do also adjust ``volume`` if ``adjclose`` is also ``True`` - ``round`` (default: ``True``) Whether to round the values to a specific number of decimals after having adjusted the close - ``roundvolume`` (default: ``0``) Round the resulting volume to the given number of decimals after having adjusted it - ``decimals`` (default: ``2``) Number of decimals to round to - ``swapcloses`` (default: ``False``) [2018-11-16] It would seem that the order of *close* and *adjusted close* is now fixed. The parameter is retained, in case the need to swap the columns again arose. """ # Add a line lines = ("adjclose",) params = ( ("reverse", False), ("adjclose", True), ("adjvolume", True), ("round", True), ("decimals", 2), ("roundvolume", False), ("swapcloses", False), )
[docs] def start(self): """Start the Yahoo Finance CSV data feed. Reverses data order if needed for correct chronological sequence. """ super().start() # If reverse is False, return directly, don't run code below if not self.params.reverse: return # Yahoo sends data in reverse order and the file is still unreversed # Use deque double-ended queue, appending to left is much more efficient than list. # If file dates are reversed, data is reversed during transfer, so dates in new file are in correct order dq = collections.deque() for line in self.f: dq.appendleft(line) # Create a string buffer object, write queue data to file, move pointer to 0th character, close file, assign file to self.f f = io.StringIO(newline=None) f.writelines(dq) f.seek(0) self.f.close() self.f = f
def _loadline(self, linetokens): # _loadline code is relatively familiar, all quite similar # A while loop while True: nullseen = False for tok in linetokens[1:]: if tok == "null": nullseen = True linetokens = self._getnextline() # refetch tokens if not linetokens: return False # cannot fetch, go away # out of for to carry on wiwth while True logic break if not nullseen: break # can proceed # Counter, value increases by 1 when calling next(i) i = itertools.count(0) # Get time string dttxt = linetokens[next(i)] # Generate time dt = date(int(dttxt[0:4]), int(dttxt[5:7]), int(dttxt[8:10])) # Convert time to number dtnum = date2num(datetime.combine(dt, self.p.sessionend)) # Assign value to datetime line self.lines.datetime[0] = dtnum # Get open, high, low, close, open interest o = float(linetokens[next(i)]) h = float(linetokens[next(i)]) low = float(linetokens[next(i)]) c = float(linetokens[next(i)]) self.lines.openinterest[0] = 0.0 # 2018-11-16 ... Adjusted Close seems to always be delivered after # the close and before the volume columns # Get adjusted price adjustedclose = float(linetokens[next(i)]) # Try to get volume, if not available, set to 0 try: v = float(linetokens[next(i)]) except Exception as e: # cover the case in which volume is "null" print(e) v = 0.0 # If swapping close price and adjusted close price, perform swap if self.p.swapcloses: # swap closing prices if requested c, adjustedclose = adjustedclose, c # Calculate adjustment factor, the calculation method seems different from conventional usage, but not necessarily wrong adjfactor = c / adjustedclose # in v7 "adjusted prices" seem to be given, scale back for non adj # If price adjustment is needed, divide by adjustment factor if self.params.adjclose: o /= adjfactor h /= adjfactor low /= adjfactor c = adjustedclose # If the price goes down, volume must go up and viceversa # If adjusting volume, the logic here has some issues, but shouldn't affect usage as stock mergers may exist # todo pay attention to logic if self.p.adjvolume: v *= adjfactor # If rounding is needed, round the prices if self.p.round: decimals = self.p.decimals o = round(o, decimals) h = round(h, decimals) low = round(low, decimals) c = round(c, decimals) # Round the volume v = round(v, self.p.roundvolume) # Assign calculated data to corresponding lines self.lines.open[0] = o self.lines.high[0] = h self.lines.low[0] = low self.lines.close[0] = c self.lines.volume[0] = v self.lines.adjclose[0] = adjustedclose return True
[docs] class YahooLegacyCSV(YahooFinanceCSVData): """ This is intended to load files which were downloaded before Yahoo discontinued the original service in May-2017 Used to load data downloaded before May 2017 """ params = (("version", ""),)
[docs] class YahooFinanceCSV(feed.CSVFeedBase): """Yahoo Finance CSV feed class. Wrapper class for YahooFinanceCSVData feed functionality. """ DataCls = YahooFinanceCSVData
# todo Test this class when time permits to see if it still works, if so, try to add comments
[docs] class YahooFinanceData(YahooFinanceCSVData): # This is a method to directly crawl data from Yahoo """ Executes a direct download of data from Yahoo servers for the given time range. Specific parameters (or specific meaning): - ``dataname`` The ticker to download ('YHOO' for Yahoo own stock quotes) - ``proxies`` A dict indicating which proxy to go through for the download as in {'http': 'http://myproxy.com'} or {'http': 'http://127.0.0.1:8080'} - ``period`` The timeframe to download data in. Pass 'w' for weekly and 'm' for monthly. - ``reverse`` [2018-11-16] The latest incarnation of Yahoo online downloads returns the data in the proper order. The default value of ``reverse`` for the online download is therefore set to ``False`` - ``adjclose`` Whether to use the dividend/split adjusted close and adjust all values according to it. - ``urlhist`` The url of the historical quotes in Yahoo Finance used to gather a ``crumb`` authorization cookie for the download - ``urldown`` The url of the actual download server - ``retries`` Number of times (each) to try to get a ``crumb`` cookie and download the data """ params = ( ("proxies", {}), ("period", "d"), ("reverse", False), ("urlhist", "https://finance.yahoo.com/quote/{}/history"), ("urldown", "https://query1.finance.yahoo.com/v7/finance/download"), ("retries", 3), ) def __init__(self): """Initialize the Yahoo Finance data feed. Sets up error tracking for data downloads. """ self.error = None
[docs] def start_v7(self): """Start Yahoo Finance data download using v7 API. Fetches crumb cookie and downloads historical data. """ try: import requests except ImportError: msg = ( "The new Yahoo data feed requires to have the requests " "module installed. Please use pip install requests or " "the method of your choice" ) raise Exception(msg) self.error = None url = self.p.urlhist.format(self.p.dataname) sesskwargs = dict() if self.p.proxies: sesskwargs["proxies"] = self.p.proxies crumb = None sess = requests.Session() for i in range(self.p.retries + 1): # at least once resp = sess.get(url, **sesskwargs) if resp.status_code != requests.codes.ok: continue txt = resp.text i = txt.find("CrumbStore") if i == -1: continue i = txt.find("crumb", i) if i == -1: continue istart = txt.find('"', i + len("crumb") + 1) if istart == -1: continue istart += 1 iend = txt.find('"', istart) if iend == -1: continue crumb = txt[istart:iend] crumb = crumb.encode("ascii").decode("unicode-escape") break if crumb is None: self.error = "Crumb not found" self.f = None return from ..utils.py3 import urlquote crumb = urlquote(crumb) # urldown/ticker?period1=posix1&period2=posix2&interval=1d&events=history&crumb=crumb # Try to download urld = f"{self.p.urldown}/{self.p.dataname}" urlargs = [] posix = date(1970, 1, 1) if self.p.todate is not None: period2 = (self.p.todate.date() - posix).total_seconds() urlargs.append(f"period2={int(period2)}") if self.p.todate is not None: period1 = (self.p.fromdate.date() - posix).total_seconds() urlargs.append(f"period1={int(period1)}") intervals = { TimeFrame.Days: "1d", TimeFrame.Weeks: "1wk", TimeFrame.Months: "1mo", } urlargs.append(f"interval={intervals[self.p.timeframe]}") urlargs.append("events=history") urlargs.append(f"crumb={crumb}") urld = "{}?{}".format(urld, "&".join(urlargs)) f = None for i in range(self.p.retries + 1): # at least once resp = sess.get(urld, **sesskwargs) if resp.status_code != requests.codes.ok: continue ctype = resp.headers["Content-Type"] # Cover as many text types as possible for Yahoo changes if not ctype.startswith("text/"): self.error = "Wrong content type: %s" % ctype continue # HTML returned? wrong url? # buffer everything from the socket into a local buffer try: # r.encoding = 'UTF-8' f = io.StringIO(resp.text, newline=None) except Exception as e: print(e) continue # try again if possible break self.f = f
[docs] def start(self): """Start the Yahoo Finance data feed. Initiates download and prepares CSV parser. """ self.start_v7() # Prepared a "path" file - CSV Parser can take over super().start()
[docs] class YahooFinance(feed.CSVFeedBase): """Yahoo Finance feed class. Wrapper class for YahooFinanceData feed functionality. """ DataCls = YahooFinanceData # Get specific parameters and form tuple params = DataCls.params._gettuple()