option的匹配
val map = Map(("zhangsan",2000),("lisi",2500),("wangwu",3000)) val option:Any = map.get("zhangsan")option match { case Some(v) =>println(v) case None=>println("nothing") case _=>println()}
|
作业题:
object Test2 { def main(args: Array[String]): Unit = { //mr mapper lineRecordReader iterator list val lines:Iterator[String] = Source.fromFile("log.txt").getLines()// println("site3,,".split(",",-1).length) val filterData:Iterator[String] = lines.filter(_.split(",").length==3) //1 site 2 user 3 time //day uv pv val data1:Iterator[(String,String,String)] = filterData.map(t=>{ val strs:Array[String] = t.split(",") (strs(0),strs(1),strs(2).split(" ")(0)) //site user day }) val data2:Map[(String,String),List[(String,String,String)]] = data1.toList.groupBy(t=>(t._1,t._3)) val dayPV = data2.mapValues(_.length) dayPV.foreach(println) val dayUV = data2.mapValues(t=>t.map(_._2).distinct.length) dayUV.foreach(println) }}//site4,user5,//site3,,//site1,user1,2018-03-02 02:25:25object Test3 { def main(args: Array[String]): Unit = { //mr mapper lineRecordReader iterator list val lines:Iterator[String] = Source.fromFile("log.txt").getLines() // println("site3,,".split(",",-1).length) val filterData:Iterator[String] = lines.filter(_.split(",").length==3) //1 site 2 user 3 time //day uv pv val data1:Iterator[(String,String,String,String)] = filterData.map(t=>{ val strs:Array[String] = t.split(",") val times = strs(2).split(" ") val hour = times(1).split(":")(0) (strs(0),strs(1),times(0),hour) //site user day }) val data2:Map[(String,String,String),List[(String,String,String,String)]] = data1.toList.groupBy(t=>(t._1,t._3,t._4)) val hourPV = data2.mapValues(_.length) hourPV.foreach(println) println("**********************") val hourUV = data2.mapValues(t=>t.map(_._2).distinct.length) hourUV.foreach(println) }} |
object MovieTest { def main(args: Array[String]): Unit = { val ratingData = Source.fromFile("ratings.txt").getLines() val movieData = Source.fromFile("movies.txt").getLines() val ratingData1:Iterator[(String,String,String)] = ratingData.map(t=>{ val strs = t.split(",") (strs(0),strs(1),strs(2)) //uid mid }) val movieData1:Iterator[(String,String)] = movieData.map(t=>{ val strs = t.split(",") (strs(0),strs(strs.length-1)) //mid types }) // mid type val mAndTypes:Map[String,String] = movieData1.toMap val umScore:Iterator[(String,String,String)] = ratingData1.flatMap(t=>{ //t uid mid score val types:String = mAndTypes(t._2) // 动作|惊悚|犯罪// (t._1,t._3,types) //uid score types val typess = types.split("\\|") val userMovieScore:Array[(String,String,String)] = typess.map(e=>{ (t._1,t._3,e) //uid score type }) userMovieScore }) //uid type uid score type val lt = umScore.toList val groupData:Map[(String,String),List[(String,String,String)]] = lt.groupBy(t=>(t._1,t._3)) val utypeAvg:Map[(String,String),Double] = groupData.mapValues(t=>{ val avg = t.map(_._2.toDouble).sum/t.length avg }) val list:List[((String,String),Double)] = utypeAvg.toList val list1:List[(String,String,Double)] =list.map(t=>{ (t._1._1,t._1._2,t._2) }) val groupList:Map[String,List[(String,String,Double)]] = list1.groupBy(_._1) val result:Map[String,(String,Double)] = groupList.mapValues(t=>{ val lst: List[(String, String, Double)] = t.sortBy(-_._3) (lst(0)._2,lst(0)._3) }) result.foreach(println) }} |
偏函数
专门是匹配的函数
scala> var arr = Array(1,2,3,4,5,6) arr: Array[Int] = Array(1, 2, 3, 4, 5, 6) scala> def pf:PartialFunction[Int,Int]={ | case x=>x*10 | } pf: PartialFunction[Int,Int]
scala> arr.map(pf) res1: Array[Int] = Array(10, 20, 30, 40, 50, 60)
scala> var arr = Array(("zhangsan",2000),("lisi",2500)) arr: Array[(String, Int)] = Array((zhangsan,2000), (lisi,2500))
scala> def pf:PartialFunction[(String,Int),(String,Int)]={ | case (x,y)=>(x,y+1000) | } pf: PartialFunction[(String, Int),(String, Int)]
scala> arr.map(pf) res2: Array[(String, Int)] = Array((zhangsan,3000), (lisi,3500)) |
定义偏函数
def methodName:PartitionFunction[inType,outType]={
case =>
}
AKKA
akka是一个通信机制,相当于hadoop中的RPC协议
akka就是spark1.6以前的通信协议,1.6以后使用的通信协议是netty
akka相当于多线程 ---> 多线程 --->单线程处理能力差不能解决并发的问题---->多线程就是多个线程一起工作 ---> 线程数据混乱(多线程之间不会进行通信)
h5
python mysql
hadoop