"""
# This file is part of famafrench
# Copyright (c) 2020, Christian Jauregui <chris.jauregui@berkeley.edu>
# See file LICENSE.txt for license information.
Filename
_________
`famafrench/wrdsconnect.py`
Description
___________
wrdsConnection
Object class used to set up a remote connection to wrds-cloud.
This class largely builds on the "Connection" class in the `WRDS-Py` library.
"""
__author__ = 'Christian Jauregui <chris.jauregui@berkeley.edu'
__all__ = ["wrdsConnection"]
# Standard Imports
import os
import sys
import pandas as pd
import sqlalchemy as sa
from sys import version_info
from wrds import __version__ as wrds_version
# Declare 'wrdsConnection' object class
[docs]class wrdsConnection:
__doc__ = """
Class for setting up the remote connection to `wrds-cloud`;
largely builds on the ``Connection`` class in the `WRDS-Py library <https://pypi.org/project/wrds/0.0.5/>`_.
Copyright (c) 2017 Wharton Research Data Services
"""
def __init__(self, autoconnect=True):
"""
Set up connection to WRDS database by providing necessary parameters,
including username and password. By default, also establish the connection to the database.
"""
py3 = version_info[0] > 2
if not py3:
input = raw_input # use raw_input in python 2
PermissionError = Exception
FileNotFoundError = Exception
appname = '{0} python {1}.{2}.{3}/wrds {4}'.format(
sys.platform,
version_info[0],
version_info[1],
version_info[2],
wrds_version)
# Sane defaults
self.WRDS_params = {'WRDS_USERNAME': os.getenv('WRDS_USERNAME'),
'WRDS_PASSWORD': os.getenv('WRDS_PASSWORD'),
'WRDS_POSTGRES_HOST': 'wrds-pgdata.wharton.upenn.edu',
'WRDS_POSTGRES_PORT': 9737,
'WRDS_POSTGRES_DB': 'wrds',
'WRDS_CONNECT_ARGS': {'sslmode': 'require',
'application_name': appname}}
pghost = 'postgresql://{usr}:{pwd}@{host}:{port}/{dbname}'
self.engine = sa.create_engine(pghost.format(usr=self.WRDS_params['WRDS_USERNAME'],
pwd=self.WRDS_params['WRDS_PASSWORD'],
host=self.WRDS_params['WRDS_POSTGRES_HOST'],
port=self.WRDS_params['WRDS_POSTGRES_PORT'],
dbname=self.WRDS_params['WRDS_POSTGRES_DB']),
connect_args=self.WRDS_params['WRDS_CONNECT_ARGS'])
if autoconnect:
self.connect()
def connect(self):
"""
Make a connection to the `wrds-cloud` database. Similar to the `WRDS-Py` package's method :meth:`wrds.sql.connect`.
Parameters
___________
None
Returns
_______
None
Class instance attributes are updates.
"""
try:
self.connection = self.engine.connect()
except Exception as e:
# Parameters for sa.create_engine(*args, **kwargs):
# https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine
pghost = 'postgresql://{usr}:{pwd}@{host}:{port}/{dbname}'
self.engine = sa.create_engine(pghost.format(usr=self.WRDS_params['WRDS_USERNAME'],
pwd=self.WRDS_params['WRDS_PASSWORD'],
host=self.WRDS_params['WRDS_POSTGRES_HOST'],
port=self.WRDS_params['WRDS_POSTGRES_PORT'],
dbname=self.WRDS_params['WRDS_POSTGRES_DB']),
connect_args=self.WRDS_params['WRDS_CONNECT_ARGS'])
try:
self.connection = self.engine.connect()
return self.connection
except Exception as e:
print("There was an error with your username and password.")
raise e
def close(self):
"""
Close the connection to the database. Similar to the `WRDS-Py` package's method :meth:`wrds.sql.close`.
Parameters
___________
None
Returns
_______
"""
self.connection.close()
self.engine.dispose()
return None
def raw_sql(self, sqlquery, coerce_float=True, date_cols=None, index_col=None, params=None):
"""
Query the `wrds-cloud` database using a raw SQL string.
Based on the `WRDS-Py` package's method :meth:`wrds.sql.raq_sql`
Parameters
___________
sqlquery : str
SQL code in string object.
coerce_float : bool, default True [optional]
Attempts to convert values to non-string, non-numeric objects
to floating point. Can result in loss of precision.
date_cols : list or dict, default None [optional]
- List of column names to parse as date
- dict of "{column_name: format string}" where format string is:
* :meth:`date.strftime` compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps
- dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of :func:`pandas.to_datetime`
index_col : str, or list, str, default None [optional]
Column(s) to set as ``index(MultiIndex)``.
params : dict
Parameters to SQL query, if parameterized.
Returns
________
pd_sql : pandas.DataFrame
SQL query result.
"""
try:
pd_sql = pd.read_sql_query(sqlquery,
self.connection,
coerce_float=coerce_float,
parse_dates=date_cols,
index_col=index_col,
params=params)
return pd_sql
except sa.exc.ProgrammingError as e:
raise e
def get_wrds_table(self, library, table, obs=-1, offset=0, columns=None, coerce_float=None, index_col=None, date_cols=None):
"""
Create a :class:`pandas.DataFrame` from an entire table in the database.
Based on the `WRDS-Py` package's method :meth:`wrds.sql.get_table`
Parameters
___________
library : str
Postgres schema name
table : str
Postgress table name
obs : int, default -1, [optional]
Specifies the number of observations to pull from the table.
An integer less than 0 will return the entire table.
offset : int, default 0, [optional]
Specifies the starting point for the query.
An offset of 0 will start selecting from the beginning.
columns : list or tuple, default None, [optional]
Specifies the columns to be included in the output data frame.
coerce_float : bool, default True, [optional]
Attempt to convert values to non-string, non-numeric objects
to floating point. Can result in loss of precision.
date_cols : list or dict, default None, [optional]
- list of column names to parse as date
- dict of ``{column_name: format string}`` where format string is :meth:`date.strftime` compatible in case of
parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps
- dict of ``{column_name: arg dict}``, where the arg dict corresponds to the keyword arguments of
:func:1pandas.to_datetime`
index_col : str, or list, str, default None, [optional]
Column(s) to set as `index(MultiIndex)`
Returns
_______
None
"""
if obs < 0:
obsstmt = ''
else:
obsstmt = ' LIMIT {}'.format(obs)
if columns is None:
cols = '*'
else:
cols = ','.join(columns)
sqlstmt = ('SELECT {cols} FROM {schema}.{table} {obsstmt} OFFSET {offset};'.\
format(cols=cols,
schema=library,
table=table,
obsstmt=obsstmt,
offset=offset))
return self.raw_sql(sqlstmt, coerce_float=coerce_float, index_col=index_col, date_cols=date_cols)