#!/usr/bin/env python
from numpy import *
def pageRankGenerator(At = [array((), int64)],
numLinks = array((), int64),
ln = array((), int64),
alpha = 0.85,
convergence = 0.0001,
checkSteps = 10
):
"""
Compute an approximate page rank vector of N pages to within
some convergence factor.
@param At a sparse square matrix with N rows. At[ii] contains
the indices of pages jj linking to ii.
@param numLinks iNumLinks[ii] is the number of links going out
from ii.
@param ln contains the indices of pages without links
@param alpha a value between 0 and 1. Determines the relative
importance of "stochastic" links.
@param convergence a relative convergence criterion. Smaller
means better, but more expensive.
@param checkSteps check for convergence after so many steps
"""
# the number of "pages"
N = len(At)
# the number of "pages without links"
M = ln.shape[0]
# initialize: single-precision should be good enough
iNew = ones((N,), float64) / N
iOld = ones((N,), float64) / N
done = False
while not done:
# normalize every now and then for numerical stability
iNew /= sum(iNew)
for step in range(checkSteps):
# swap arrays
iOld, iNew = iNew, iOld
# an element in the 1 x I vector.
# all elements are identical.
oneIv = (1 - alpha) * sum(iOld) / N
# an element of the A x I vector.
# all elements are identical.
oneAv = 0.0
if M > 0:
oneAv = alpha * sum(iOld.take(ln, axis = 0)) / N
# the elements of the H x I multiplication
ii = 0
while ii < N:
page = At[ii]
h = 0
if page.shape[0]:
h = alpha * dot(
iOld.take(page, axis = 0),
1. / numLinks.take(page, axis = 0)
)
iNew[ii] = h + oneAv + oneIv
ii += 1
diff = sum(abs(iNew - iOld))
done = (diff < convergence)
yield iNew
def transposeLinkMatrix(
outGoingLinks = [[]]
):
"""
Transpose the link matrix. The link matrix contains the pages
each page points to. But what we want is to know which pages
point to a given page, while retaining information about how
many links each page contains (so store that in a separate
array), as well as which pages contain no links at all (leaf
nodes).
@param outGoingLinks outGoingLinks[ii] contains the indices of
pages pointed to by page ii
@return a tuple of (incomingLinks, numOutGoingLinks, leafNodes)
"""
nPages = len(outGoingLinks)
# incomingLinks[ii] will contain the indices jj of the pages
# linking to page ii
incomingLinks = [[] for ii in range(nPages)]
# the number of links in each page
numLinks = zeros(nPages, int64)
# the indices of the leaf nodes
leafNodes = []
for ii in range(nPages):
if len(outGoingLinks[ii]) == 0:
leafNodes.append(ii)
else:
numLinks[ii] = len(outGoingLinks[ii])
# transpose the link matrix
for jj in outGoingLinks[ii]:
incomingLinks[jj].append(ii)
incomingLinks = [array(ii) for ii in incomingLinks]
numLinks = array(numLinks)
leafNodes = array(leafNodes)
return incomingLinks, numLinks, leafNodes
def pageRank(
linkMatrix = [[]],
alpha = 0.85,
convergence = 0.0001,
checkSteps = 10
):
"""
Convenience wrap for the link matrix transpose and the generator.
@see pageRankGenerator for parameter description
"""
incomingLinks, numLinks, leafNodes = transposeLinkMatrix(linkMatrix)
for gr in pageRankGenerator(incomingLinks, numLinks, leafNodes,
alpha = alpha, convergence = convergence,
checkSteps = checkSteps):
final = gr
return final
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
基于Python实现的pagerank算法.zip (2个子文件)
基于Python实现的pagerank算法
pageRank.py 4KB
pagerank_test.py 540B
共 2 条
- 1
资源评论
__AtYou__
- 粉丝: 1942
- 资源: 667
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功