root / barchart / trunk / ovpyq.py

Revision 141, 10.1 kB (checked in by phil, 7 months ago)

lets go with psycopg2

Line 
1#!/usr/bin/python2
2##################################################
3# Name:        ovpyq - Python Quote Grabber
4# Author:      Philip J Cooper <Philip.Cooper@openvest.org>
5# orig Author:      Rimon Barr <barr@cs.cornell.edu>
6# Start date:  15 July 2003
7# Purpose:     Retrieve stock quote data in Python
8#              all data is cached in a postgresql database
9#              New data is retrieved from Yahoo
10##################################################
11
12import os, sys, re, traceback, getopt, urllib, string
13import psycopg2, mx.DateTime
14
15__version__ = "0.5.1"
16CACHE='dbname=testprices user=phil'
17DEBUG = 0
18TRUSTDB = 1
19
20db = psycopg2.connect(CACHE)
21
22def showVersion():
23  print 'ovPYQ v'+__version__+', by Phil Cooper:'
24  print 'Python Yahoo Quote fetching utility'
25
26def showUsage():
27  print
28  showVersion()
29  print
30  print 'Usage: ovpyq [-i] [start_date [end_date]] ticker [ticker...]'
31  print 
32  print '  -h, -?, --help    display this help information'
33  print '  -v, --version     display version'
34  print '  -i, --stdin       tickers fed on stdin, one per line'
35  print '  -d, --debug       show verbose output'
36  print
37  print '    date formats are yyyymmdd'
38  print '    if enddate is omitted, it is assume to be the same as startdate'
39  print '    if startdate is omitted, we use *current* stock tables'
40  print '      and otherwise, use historical stock tables.'
41  print '      (current stock tables will give previous close price before'
42  print '       market closing time.)'
43  print '    tickers are exactly what you would type at finance.yahoo.com'
44  print '    output format: "ticker, date (yyyymmdd), open, high, low, close, vol"'
45  print
46  print 'Send comments, suggestions and bug reports to <Philip.Cooper@openvest.org>.'
47  print
48
49def usageError():
50  print 'ovpyq: command syntax error'
51  print 'Try `ovpyq --help\' for more information.'
52
53def splitLines(buf):
54  lines=string.split(buf, '\n')
55  lines=filter(lambda x:x, lines)
56  def removeCarriage(s):
57    if s[-1]=='\r': return s[:-1]
58    else: return s
59  lines=[removeCarriage(l) for l in lines]
60  return lines
61
62class TradingDateChecker:
63  def __init__(self,years=5):
64    self.holidays = {}
65    yearNow  = mx.DateTime.now().year
66    md=((12,25),(1,1),(7,4))
67    for m,d in md:
68      for i in range(years):
69        h = mx.DateTime.DateTime(yearNow-i,m,d)
70        if h.day_of_week==6:
71          h=h+1
72        self.holidays[h]=1
73    self.holidays[mx.DateTime.DateTime(2006,02,20)]=1
74    self.holidays[mx.DateTime.DateTime(2005,02,21)]=1
75   
76  def isTradingDate(self,d1):
77    if d1.day_of_week > 4:
78      return False
79    if self.holidays.has_key(d1):
80      return False
81    return True 
82  def showHolidays(self):
83    for d in self.holidays.keys():
84      print d.Format("%m-%d-%y")
85
86   
87def allDates(d1,d2):
88  '''Return all dates in ascending order.'''
89  if int(d1)>int(d2):
90    raise 'd1 must be smaller than d2'
91  dates = []
92  while d1 < d2:
93    if tdc.isTradingDate(d1):
94      dates.append(d1)
95    d1 = d1 + 1
96  return dates
97
98def aggDates(dates):
99  '''Aggregate list of dates (yyyymmdd) in range pairs'''
100  if not dates: return []
101  aggs = []
102  dates.sort()
103  high=dates.pop(0)
104  low=high
105  for date in dates:
106    if date==high+1 : high=date
107    else:
108      aggs.append( (low, high) )
109      high=date; low=high
110  aggs.append( (low, high) )
111  aggs = [ (str(low),str(high)) for (low, high) in aggs]
112  return aggs
113
114def getYahooTickerHistory(d1, d2, ticker):
115  if type(d1)==type('string'):d1=mx.DateTime.DateFrom(d1)
116  if type(d2)==type('string'):d2=mx.DateTime.DateFrom(d2)
117  if DEBUG:
118    print 'Quering Yahoo!...... for %s (%s -> %s)' % (ticker, d1 , d2 )
119  url='http://table.finance.yahoo.com/table.csv'
120  query='a=%02d&b=%s&c=%s&d=%02d&e=%s&f=%s&s=%s&y=0&g=d&ignore=.csv'% \
121         (d1.month-1,d1.day,d1.year,d2.month-1,d2.day,d2.year,ticker)
122  url=url+'?'+query
123  if DEBUG:
124    print "url string: %s"%url
125  f=urllib.urlopen(url)
126  buf=f.read()
127  lines=splitLines(buf)
128  if re.match('no prices', lines[0], re.I): return
129  comment=re.compile("<!--.*-->")
130  while comment.match(lines[-1]):
131    lines=lines[:-1]
132  lines=lines[1:]
133  return lines
134
135def getCachedTicker(d1, d2, ticker, trustdb=TRUSTDB):
136  '''Get tickers, hopefully from cache.
137    d1, d2 = yyyymmdd starting and ending
138    ticker = symbol string
139    trustdb = integer for cachebehaviour
140       2 : trust cache do not attempt url update (fastest)
141       1 : trust cache but attempt to get newer or older records (default)
142       0 : attempt to get all missing dates and update name
143      -1 : purge current database and get entire history (slowest)'''
144  if type(d1)==type('string'):d1=mx.DateTime.DateFrom(d1)
145  if type(d2)==type('string'):d2=mx.DateTime.DateFrom(d2)
146  #this is to skip nontrading days (holidays and weekends) off the start date
147  while not tdc.isTradingDate(d1):
148    d1=d1+1
149  # if the end date is today and it's before 5pm just get yesterday's
150  if d2 == mx.DateTime.today() and mx.DateTime.now().hour < 17:
151    d2=d2-1
152  #this is to skip nontrading days (holidays and weekends) off the end date 
153  while not tdc.isTradingDate(d2):
154    d2=d2-1
155  if trustdb == -1:
156    print "delete this"
157    c1=db.cursor()
158    c1.execute("delete from dpri where ticker = '%s'" % ticker)
159    db.commit()
160  holes = []
161  data=getPostgresTickerHistory(d1,d2,ticker)
162  if len(data) == 0:
163    holes.append((d1,d2))
164  if trustdb == 1 and len(data) > 0:
165    if d1 < data[0][0]:
166      if DEBUG:
167        print "found hole 1 from %s < %s" % (d1,data[0][0])
168      holes.append((d1,data[0][0]-1))
169    if d2 > data[-1][0]:
170      if DEBUG:
171        print "found hole 2 from %s > %s" % (d2,data[-1][0])
172        print "***FIXME FIXME*** we really should check the %s prices for splits"%d2
173      holes.append((data[-1][0]+1,d2))
174  else:
175    if trustdb == 0:
176      dates=allDates(d1,d2)
177      # compute missing
178      cached = [d[0]  for d in data]
179      missing = [d for d in dates if d not in cached]
180      holes=aggDates(missing)
181     
182  updates = 0
183  if len(holes):
184    c1=db.cursor()
185    for td1, td2 in holes:
186      if DEBUG:
187        print "hole for %s to %s" %(td1,td2)
188      tmp = getYahooTickerHistory(td1, td2, ticker)
189      if len(tmp) and tmp[0].find('404 Not Found')==-1:
190         for t in tmp:
191           t=string.split(t,',')
192           if DEBUG:
193             print "insert into dpri values('%s','%s',%s,%s,%s,%s,%s)"%tuple([ticker]+[mx.DateTime.strptime(t[0],"%Y-%m-%d").strftime("%Y%m%d")]+t[1:6])
194           c1.execute("insert into dpri values('%s','%s',%s,%s,%s,%s,%s)"%tuple([ticker]+[mx.DateTime.strptime(t[0],"%Y-%m-%d").strftime("%Y%m%d")]+t[1:6]))
195           updates = updates +1
196    if updates:
197      db.commit()
198      data= getPostgresTickerHistory(d1, d2, ticker)
199  return data
200
201
202def getPostgresTickerHistory(d1, d2, ticker):
203  '''Get tickers, hopefully from cache.
204    d1, d2 = yyyymmdd starting and ending
205    ticker = symbol string'''
206  if type(d1)==type('string'):d1=mx.DateTime.DateFrom(d1)
207  if type(d2)==type('string'):d2=mx.DateTime.DateFrom(d2)
208  # get from cache
209  c1=db.cursor()
210  c1.execute("select dte,open,high,low,close,volume from dpri where ticker='%s' and dte>='%s' and dte<='%s' order by dte"%(ticker,d1.strftime("%Y-%m-%d"),d2.strftime("%Y-%m-%d")))
211  if DEBUG:
212    print "select dte,open,high,low,close,volume from dpri where ticker='%s' and dte>='%s' and dte<='%s' order by dte"%(ticker,d1.strftime("%Y-%m-%d"),d2.strftime("%Y-%m-%d"))
213  data=c1.fetchall()
214  # compute missing
215  return data
216
217def getTickers(d1, d2, tickers, trustdb=TRUSTDB):
218  '''Get tickers.
219    d1, d2 = yyyymmdd starting and ending
220    tickers = list of symbol strings
221    trustdb = trust (see getCachedTicker)'''
222  result = []
223  for t in tickers:
224    result = result + [(t,)+l for l in  getCachedTicker(d1, d2, t, trustdb)]
225  return result
226
227def getName(ticker):
228  c1=db.cursor()
229  c1.execute("select name from stockid where ticker='%s'"%ticker)
230  name=c1.fetchone()[0]
231  return name
232
233
234def getTickersNow(tickers):
235  if len(tickers)>100:
236    return getTickersNow(tickers[:100])+getTickersNow(tickers[100:])
237  url='http://finance.yahoo.com/d/quotes.csv';
238  tickers=string.join(tickers)
239  query={ 's':tickers, 'f':'sohgpv', 'e':'.csv' }
240  url=url+'?'+urllib.urlencode(query)
241  if DEBUG:
242    print "url string: %s"%url
243  f=urllib.urlopen(url)
244  buf=f.read()
245  lines=splitLines(buf)
246  result = []
247  def processLine(l):
248    l=string.split(l, ',')
249    l[0]=string.upper(l[0][1:-1])
250    l.insert(1, mx.DateTime.today().strftime("%Y%m%d"))
251    result.append(l)
252  for l in lines:
253    processLine(l)
254  return result
255
256def main():
257  TRUSTDB = 1
258  # parse options
259  try:
260    opts, args = getopt.getopt(sys.argv[1:], 'hv?it:d',
261      ['help', 'version', 'stdin', 'debug'])
262  except getopt.GetoptError: 
263    usageError()
264    return
265  # process options
266  stdin=0
267  for o, a in opts:
268    print "%s %s" % (o, a)
269    if o in ("-h", "--help", "-?"):
270      showUsage()
271      return
272    if o in ("-v", "--version"):
273      showVersion()
274      return 
275    if o in ("-d", "--debug"):
276      DEBUG = 1
277      print "DEBUG is now: %s" % (DEBUG)
278    if o in ("-t", "--trustdb"): 
279      print "%s %s" % (o, a)
280      TRUSTDB=int(a)
281
282  startdate=mx.DateTime.today()
283  enddate=startdate
284  today=1
285  tickers=[]
286  argpos=-1
287  for a in args:
288    argpos=argpos+1
289    if argpos==0 and string.digits.find(a[0])<>-1:
290        startdate=mx.DateTime.DateFrom(a)
291        today=0
292        continue
293    if argpos==1 and string.digits.find(a[0])<>-1:
294        enddate=mx.DateTime.DateFrom(a)
295        continue
296    tickers.append(a.upper())
297  if stdin:
298    tickers=tickers+splitLines(sys.stdin.read())
299  if not len(tickers):
300    showUsage()
301    return
302  if today:
303    result = getTickersNow(tickers)
304    for l in result:
305      print l
306  else:
307    result = getTickers(startdate, enddate, tickers, TRUSTDB)
308    for l in result:
309      print "%s,%s,%.3f,%.3f,%.3f,%.3f,%i"%((l[0],l[1].Format("%Y%m%d"))+l[2:7])
310
311
312try:
313  tdc=TradingDateChecker();
314  if __name__=='__main__': main()
315except KeyboardInterrupt:
316  traceback.print_exc()
317  print 'Break!'
Note: See TracBrowser for help on using the browser.