foreach遍历
List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> rdd = sc.parallelize(data);
rdd.foreach(new VoidFunction<Integer>()
{
@Override
public void call(Integer num) throws Exception
{
// TODO Auto-generated method stub
System.out.println("numbers;" + num);
}
});
//输出 1,2,3,4,5
Lamda表达式写法 rdd.foreach(x-> System.out.println(x) );
List转JavaRDD
List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> rdd = sc.parallelize(data);
JavaRDD 转JavaPairRDD
List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
JavaRDD<Integer> rdd = sc.parallelize(data);
//使用rdd.mapToPair方法,PairFunction中 第一个参数是输入,后面2个参数是元组的输出
JavaPairRDD<Integer, Integer> firstRDD = rdd.mapToPair(new PairFunction<Integer, Integer, Integer>()
{
@Override
public Tuple2<Integer, Integer> call(Integer num) throws Exception
{
return new Tuple2<>(num, num * num);
}
});
//输出(1,1),(2,4),(3,9),(4,16),(5,25)
Lamda表达式写法 rdd.mapToPair(x-> new Tuple2<>(x,x*x) );
JavaPairRDD 遍历
//将RDD转成数组,其中 x._1()表示第一个元素,x._2()表示第2个元素。
for(Tuple2<Integer,Integer> x : firstRDD.collect()){
System.out.println(x._1());
System.out.println(x._2());
}
//输出1
1
2
4
3
9
4
16
5
25
Lambda表达式 secondRDD.foreach(x-> System.out.println(x._2));
JavaPairRDD 转JavaRDD
//使用map方法,Function中的第一个为入参,第二个为输出
JavaRDD<String> res = joinRDD.map(new Function<Tuple2<Integer, Tuple2<Integer, String>>, String>()
{
@Override
public String call(Tuple2<Integer, Tuple2<Integer, String>> integerTuple2Tuple2) throws Exception
{
int key = integerTuple2Tuple2._1();
int value1 = integerTuple2Tuple2._2()._1();
String value2 = integerTuple2Tuple2._2()._2();
return "<" + key + ",<" + value1 + "," + value2 + ">>";
}
});
JavaPairRDD<String,Integer> secondRDD = firstRDD.mapToPair(x -> new Tuple2<>(x,Integer.parseInt(x)*Integer.parseInt(x)));
JavaRDD<String> thirdRDD = secondRDD.map((x->x._1));