Source code for ml_investment.data_loaders.quandl_commodities

'''
Loader for commodities price information from 
https://blog.quandl.com/api-for-commodity-data.
Data may be downloaded by script
:func:`~ml_investment.download_scripts.download_commodities.main`

Expected dataset structure
        | commodities
        | ├── LBMA_GOLD.json
        | ├── CHRIS_CME_CL1.json
        | └── ...       
'''
 
import json
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from typing import Optional, Union, List
from ..utils import load_json, load_config



[docs]class QuandlCommoditiesData: ''' Loader for commodities price information. ''' def __init__(self, data_path: Optional[str]=None): ''' data_path: path to :mod:`~ml_investment.data_loaders.quandl_commodities` dataset folder If None, than will be used ``commodities_data_path`` from `~/.ml_investment/config.json` ''' if data_path is None: data_path = load_config()['commodities_data_path'] self.data_path = data_path
[docs] def load(self, index: List[str]) -> pd.DataFrame: ''' Load time-series information about commodity price Parameters ---------- index: list of commodities codes to load data for, i.e. ``['LBMA/GOLD', 'JOHNMATT/PALL']`` Returns ------- ``pd.DataFrame`` time series price information ''' result = [] for code in index: path = '{}/{}.json'.format(self.data_path, code.replace('/', '_')) if not os.path.exists(path): continue data = load_json(path) data = np.array(data['dataset']['data']) df = pd.DataFrame() df['date'] = data[:, 0].astype(np.datetime64) df['price'] = data[:, 1].astype('float') df['date'] = df['date'].astype(np.datetime64) df['commodity_code'] = code result.append(df) if len(result) == 0: return None result = pd.concat(result, axis=0) return result
[docs] def existing_index(self): ''' Returns ------- ``List`` existing index values that can pe pushed to `load` ''' index = [x.split('.json')[0] for x in os.listdir(self.data_path)] return index