主要任务:抓取杭州气象台网站上的天气预报,并张贴在BBS上某个指定板面,并提供了详尽的日志功能,供统计分析。
Python代码(2010-6-23更新中文折行部分)
- while len(line) – pos > 0:
- fix = 0
- part = line[pos:pos+limit+fix]
- next = line[pos+limit+fix:]
- lenlength = len(part.encode(‘gbk’))
- left = len(next.encode(‘gbk’))
- while (length<limit*2) and (left>0):
- fix += 1
- part = line[pos:pos+limit+fix]
- next = line[pos+limit+fix:]
- lenlength = len(part.encode(‘gbk’))
- left = len(next.encode(‘gbk’))
- if (length>limit*2):
- fix –= 1
- part = line[pos:pos+limit+fix]
- next = line[pos+limit+fix:]
- left = len(next.encode(‘gbk’))
- point = part[-1:]
- if (point in u’〔〈《【(“‘’):
- fix –= 1
- part = line[pos:pos+limit+fix]
- elif (left>0):
- point = line[pos+limit+fix]
- if (point in u’?!:;、,。”’〕〉》)】:;.,?’):
- fix –= 1
- part = line[pos:pos+limit+fix]
- content += part + ‘\n’
- pos += limit + fix
Python代码(2009-8-13更新)
- # -*- coding:gb2312 -*-
- # python 2.5
- # auther: moqi88 # gmail.com
- # date: 2009-8-13
- # you can get ASCII control code in http://www.cs.tut.fi/~jkorpela/chars/c0.html
- import re
- import time
- import urllib
- import telnetlib
- myfile = open(‘orange_weather.txt’, ‘r’)
- myfile.read()
- myfile.seek(0)
- oldweather = myfile.read()
- myfile.close()
- logfilename = time.strftime(‘orange_log%Y%m%d.txt’, time.localtime())
- logfile = open(logfilename, ‘a’)
- isfinish = False
- print time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 第 1 次尝试’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 第 1 次尝试’
- page = urllib.urlopen("http://www.hzqx.com/gzhfw/dqyb.asp")
- u = unicode(str(page.headers), ‘gb2312’)
- data = page.read()
- match = re.search(ur" (.*)96121", data)
- if match == None:
- newweather = "今日天气暂无更新。更新更快的天气信息,请拨"
- else:
- newweather = match.group(1)
- newweather += "96121。"
- if (oldweather == newweather):
- print ‘信息源未更新’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 信息源未更新’
- else:
- f = file(‘orange_weather.txt’, ‘w’)
- f.write(newweather)
- f.close()
- title = time.strftime(‘%Y年%m月%d日%H时’, time.localtime())
- content = newweather
- content = re.sub(‘:’, ‘:’, content)
- content = re.sub(‘ ‘, ”, content)
- pos = 0
- limit = 36
- line = content.decode(‘gbk’)
- content = ”
- while len(line) – pos > 0:
- fix = 0
- part = line[pos:pos+limit]
- part2 = part.encode(‘gbk’)
- while ((len(part2)<=(limit-2)*2) and (len(part)>=limit)):
- part = line[pos:pos+limit+fix]
- part2 = part.encode(‘gbk’)
- fix += 1
- else:
- if (len(part)*2==len(part2)):
- part = part[:-1]
- content += part + ‘\n’
- pos += limit + fix – 1
- user = ‘username’
- passw = ‘password’
- board = ‘weather’
- title = time.strftime(‘%Y年%m月%d日%H时’, time.localtime())
- server = telnetlib.Telnet(‘orangecity.3322.org’)
- time.sleep(5)
- server.write(user + ‘\n’)
- time.sleep(1)
- server.write(passw + ‘\n’)
- #data = server.read_very_eager()
- #data = re.sub("\x1B\[[0-9;]{0,}[mHJK]{1,1}", ”, data)
- #print data
- time.sleep(0.5)
- server.write(‘N\ntttttttN\n\nts’ + board + ‘\n’)
- time.sleep(0.5)
- server.write(‘\x10’ + title + ‘\n\n’)
- time.sleep(2)
- server.write(‘[‘+time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime())+’更新]\n’)
- time.sleep(2)
- server.write(content.encode(‘gbk’))
- time.sleep(2)
- server.write(‘\n\x17\n’)
- time.sleep(2)
- server.write(‘!\n’)
- server.close()
- print title
- print content.encode(‘gbk’)
- print ‘任务成功’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 任务成功’
- isfinish = True
- if (isfinish == False):
- print ‘任务失败’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 任务失败’
- logfile.close()
Python代码(2009-03-20版)
- # -*- coding:gb2312 -*-
- # python 2.6
- # auther: moqi88 # gmail.com
- # date: 2009-3-20
- # you can get ASCII control code in http://www.cs.tut.fi/~jkorpela/chars/c0.html
- import re
- import time
- import urllib
- import telnetlib
- server = telnetlib.Telnet(‘orangecity.3322.org’)
- user = ‘username’
- passw = ‘password’
- myfile = open(‘weather.txt’, ‘r’)
- myfile.read()
- myfile.seek(0)
- oldweather = myfile.read()
- myfile.close()
- logfilename = time.strftime(‘log%Y%m%d.txt’, time.localtime())
- logfile = open(logfilename, ‘a’)
- isfinish = False
- for i in range(1, 16):
- print ‘time’ + str(i)
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 第 ‘ + str(i) + ‘ 次尝试’
- page = urllib.urlopen("http://www.hzqx.com/gzhfw/dqyb.asp")
- u = unicode(str(page.headers), ‘gb2312’)
- data = page.read()
- match = re.search(ur" (.*)96121", data)
- if match == None:
- newweather = "今日天气暂无更新。更新更快的天气信息,请拨"
- else:
- newweather = match.group(1)
- newweather += "96121。"
- if (oldweather == newweather):
- print ‘信息源未更新’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 信息源未更新’
- else:
- title = time.strftime(‘%Y年%m月%d日’, time.localtime())
- match = re.search(‘杭州市气象台(\d*)时’, newweather)
- if match == None:
- print ‘读取时间发布时间失败’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 读取时间发布时间失败’
- title += time.strftime(‘%H时’, time.localtime())
- else:
- title += match.group(1)
- title += ‘时’
- content = newweather
- content = re.sub(‘:’, ‘:’, content)
- content = re.sub(‘ ‘, ”, content)
- board = ‘moqi’
- time.sleep(5)
- server.write(user + ‘\n’ + passw + ‘\n’)
- data = server.read_until(user, 5)
- #data = re.sub("\x1B\[[0-9;]{0,}[mHJK]{1,1}", ”, data)
- server.write(‘Y\ntttttttN\n\nts’ + board + ‘\n\x10’ + title + ‘\n\n’ + content + ‘\x17\n’)
- time.sleep(2)
- server.write(‘!\n’)
- server.close()
- print title
- print content
- print ‘任务成功’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 任务成功’
- f = file(‘weather.txt’, ‘w’)
- f.write(newweather)
- f.close()
- isfinish = True
- break;
- time.sleep(60)
- if (isfinish == False):
- print ‘任务失败’
- print >> logfile, time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime()) + ‘ 任务失败’
- logfile.close()