import re,urllib,urllib2
 
class GoogleHarvester:
    re_links = re.compile(r'<a class=l href="(.+?)"',re.IGNORECASE|re.DOTALL)
    def __init__(self):
        pass
    def harvest(self,terms):
        '''Searchs Google for these terms. Returns only the links (URL).
 
           Input: terms (string) -- one or several words to search.
 
           Output: A list of urls (strings).
                   Duplicates links are removed, links are sorted.
          
           Example: print GoogleHarvester().harvest('monthy pythons')
        '''
        print "Google: Searching for '%s'" % terms
        links = {}
        currentPage = 0
        while True:
            print "Google: Querying page %d (%d links found so far)" % (currentPage/100+1, len(links))
            address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(terms),currentPage)
            request = urllib2.Request(address, None, {'User-Agent':'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'} )
            urlfile = urllib2.urlopen(request)
            page = urlfile.read(200000)
            urlfile.close()
            for url in GoogleHarvester.re_links.findall(page):
                links[url] = 0
            if "</div>Next</a></table></div><center>" in page: # Is there a "Next" link for next page of results ?
                currentPage += 100  # Yes, go to next page of results.
            else:
                break   # No, break out of the while True loop.
        print "Google: Found %d links." % len(links)
        return sorted(links.keys())  
 
# Example: Search for "monthy pythons"
links = GoogleHarvester().harvest('monthy pythons')
open("links.txt","w+b").write("\n".join(links))
#//python/1865

回复 "python在google上搜索"

这儿你可以回复上面这条便签

作者你的名字是？

标题给你的便签一个标题。

语言你的便签是以

你的便签在这儿输入便签内容

import re,urllib,urllib2

class GoogleHarvester:
    re_links = re.compile(r'<a class=l href="(.+?)"',re.IGNORECASE|re.DOTALL)
    def __init__(self):
        pass
    def harvest(self,terms):
        '''Searchs Google for these terms. Returns only the links (URL).

Input: terms (string) -- one or several words to search.

Output: A list of urls (strings).
                   Duplicates links are removed, links are sorted.
          
           Example: print GoogleHarvester().harvest('monthy pythons')
        '''
        print "Google: Searching for '%s'" % terms
        links = {}
        currentPage = 0
        while True:
            print "Google: Querying page %d (%d links found so far)" % (currentPage/100+1, len(links))
            address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(terms),currentPage)
            request = urllib2.Request(address, None, {'User-Agent':'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'} )
            urlfile = urllib2.urlopen(request)
            page = urlfile.read(200000)
            urlfile.close()
            for url in GoogleHarvester.re_links.findall(page):
                links[url] = 0
            if "</div>Next</a></table></div><center>" in page: # Is there a "Next" link for next page of results ?
                currentPage += 100  # Yes, go to next page of results.
            else:
                break   # No, break out of the while True loop.
        print "Google: Found %d links." % len(links)
        return sorted(links.keys())

# Example: Search for "monthy pythons"
links = GoogleHarvester().harvest('monthy pythons')
open("links.txt","w+b").write("\n".join(links))
#//python/1865

创建短链接创建一个较短的URL，连接到这个便签

私人私人便签不会显示在最近列表中

保存期限我们应该什么时候删除这张便签？

防滥用键入这些字符

Code666 (代码贴、代码片段)

[Python] python在google上搜索 →→→→→进入此内容的聊天室

回复 "python在google上搜索"