没有合适的资源?快使用搜索试试~ 我知道了~
Spark GraphX基本操作.pdf
需积分: 5 1 下载量 49 浏览量
2023-07-04
11:50:36
上传
评论
收藏 36KB PDF 举报
温馨提示
试读
2页
Spark GraphX基本操作
资源推荐
资源详情
资源评论
Spark GraphX基本操作
import org.apache.spark.SparkContext
import org.apache.spark._
import org.apache.spark.graphx._
import org.apache.spark.graphx.Graph
import org.apache.spark.graphx.Edge
import org.apache.spark.graphx.VertexRDD
import org.apache.spark.graphx.util.GraphGenerators
import org.apache.spark.graphx.GraphLoader
import org.apache.spark.storage.StorageLevel
import org.apache.spark.rdd.RDD
object SparkGraphx1 {
def main(args: Array[String]) {
val sc = new SparkContext("spark://centos.host1:7077", "Spark Graphx")
//创建点RDD
val users: RDD[(VertexId, (String, String))] = sc.parallelize(Array(
(3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))
//创建边RDD
val relationships: RDD[Edge[String]] = sc.parallelize(Array(
Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi")))
//定义一个默认用户,避免有不存在用户的关系
val defaultUser = ("John Doe", "Missing")
//构造Graph
val graph = Graph(users, relationships, defaultUser)
//点RDD、边RDD过滤
val fcount1 = graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc" }.count
println("postdocs users count: " + fcount1)
val fcount2 = graph.edges.filter(edge => edge.srcId > edge.dstId).count
println("srcId > dstId edges count: " + fcount2)
val fcount3 = graph.edges.filter { case Edge(src, dst, prop) => src > dst }.count
println("srcId > dstId edges count: " + fcount3)
//Triplets(三元组),包含源点、源点属性、目标点、目标点属性、边属性
val triplets: RDD[String] = graph.triplets.map(triplet => triplet.srcId + "-" +
triplet.srcAttr._1 + "-" + triplet.attr + "-" + triplet.dstId + "-" + triplet.dstAttr._1)
triplets.collect().foreach(println(_))
//度、入度、出度
val degrees: VertexRDD[Int] = graph.degrees;
degrees.collect().foreach(println)
val inDegrees: VertexRDD[Int] = graph.inDegrees
inDegrees.collect().foreach(println)
val outDegrees: VertexRDD[Int] = graph.outDegrees
outDegrees.collect().foreach(println)
//构建子图
val subGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
subGraph.vertices.collect().foreach(println(_))
subGraph.triplets.map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
.collect().foreach(println(_))
//Map操作,根据原图的一些特性得到新图,原图结构是不变的,下面两个逻辑是等价的,但是第一个不会被graphx系统优化
val newVertices = graph.vertices.map { case (id, attr) => (id, (attr._1 + "-1", attr._2 + "-2")) }
val newGraph1 = Graph(newVertices, graph.edges)
val newGraph2 = graph.mapVertices((id, attr) => (id, (attr._1 + "-1", attr._2 + "-2")))
//构造一个新图,顶点属性是出度
资源评论
大数据张老师
- 粉丝: 259
- 资源: 32
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功