<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ArtifactsWorkspaceSettings">
<artifacts-to-build>
<artifact name="FoodSpark2:jar" />
</artifacts-to-build>
</component>
<component name="AutoImportSettings">
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="2f6761c6-62fb-48be-959b-884768030495" name="Default Changelist" comment="" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CodeStyleSettingsInfer">
<option name="done" value="true" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectCodeStyleSettingsMigration">
<option name="version" value="2" />
</component>
<component name="ProjectId" id="2hrnt4S2vdqV91WTfQqSRlO5Edh" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="project.structure.last.edited" value="Artifacts" />
<property name="project.structure.proportion" value="0.15" />
<property name="project.structure.side.proportion" value="0.2" />
<property name="settings.editor.selected.configurable" value="preferences.lookFeel" />
</component>
<component name="RecentsManager">
<key name="CopyClassDialog.RECENTS_KEY">
<recent name="FoodSpark" />
</key>
</component>
<component name="RunManager" selected="Application.FoodSpark">
<configuration name="FoodSpark" type="Application" factoryName="Application" temporary="true">
<option name="MAIN_CLASS_NAME" value="FoodSpark.FoodSpark" />
<module name="FoodSpark2" />
<extension name="coverage">
<pattern>
<option name="PATTERN" value="FoodSpark.*" />
<option name="ENABLED" value="true" />
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
<configuration name="Scala REPL" type="ScalaScriptConsoleRunConfiguration" factoryName="Scala Console" temporary="true" show_console_on_std_err="false" show_console_on_std_out="false">
<module name="FoodSpark2" />
<option name="allowRunningInParallel" value="false" />
<option name="javaOptions" value="-Djline.terminal=NONE" />
<option name="myConsoleArgs" value="" />
<option name="workingDirectory" value="$PROJECT_DIR$" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
<recent_temporary>
<list>
<item itemvalue="Application.FoodSpark" />
<item itemvalue="Scala REPL.Scala REPL" />
</list>
</recent_temporary>
</component>
<component name="SpellCheckerSettings" BundledDictionaries="0" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" Dictionaries="0" CorrectionsLimit="5" DefaultDictionary="应用程序级" UseSingleDictionary="true" />
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="2f6761c6-62fb-48be-959b-884768030495" name="Default Changelist" comment="" />
<created>1718361274808</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1718361274808</updated>
</task>
<servers />
</component>
<component name="WindowStateProjectService">
<state x="634" y="250" key="FileChooserDialogImpl" timestamp="1718531781760">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state x="634" y="250" key="FileChooserDialogImpl/0.0.1707.1019@0.0.1707.1019" timestamp="1718531781760" />
<state width="1663" height="274" key="GridCell.Tab.0.bottom" timestamp="1718678758041">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state width="1663" height="274" key="GridCell.Tab.0.bottom/0.0.1707.1019@0.0.1707.1019" timestamp="1718678758041" />
<state width="1663" height="274" key="GridCell.Tab.0.center" timestamp="1718678758041">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state width="1663" height="274" key="GridCell.Tab.0.center/0.0.1707.1019@0.0.1707.1019" timestamp="1718678758041" />
<state width="1663" height="274" key="GridCell.Tab.0.left" timestamp="1718678758040">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state width="1663" height="274" key="GridCell.Tab.0.left/0.0.1707.1019@0.0.1707.1019" timestamp="1718678758040" />
<state width="1663" height="274" key="GridCell.Tab.0.right" timestamp="1718678758041">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state width="1663" height="274" key="GridCell.Tab.0.right/0.0.1707.1019@0.0.1707.1019" timestamp="1718678758041" />
<state x="334" y="84" key="SettingsEditor" timestamp="1718678034338">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state x="334" y="84" key="SettingsEditor/0.0.1707.1019@0.0.1707.1019" timestamp="1718678034338" />
<state x="93" y="93" width="1521" height="833" key="com.intellij.history.integration.ui.views.DirectoryHistoryDialog" timestamp="1718531318720">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state x="93" y="93" width="1521" height="833" key="com.intellij.history.integration.ui.views.DirectoryHistoryDialog/0.0.1707.1019@0.0.1707.1019" timestamp="1718531318720" />
<state x="538" y="334" key="com.intellij.ide.util.TipDialog" timestamp="1718716314753">
<screen x="0" y="0" width="1707" height="1019" />
</state>
<state x="538" y="334" key="com.intellij.ide.util.TipDialog/0.0.1707.1019@0.0.1707.1019" timestamp="1718716314753" />
</component>
</project>
spark课程设计任务
需积分: 0 152 浏览量
更新于2024-07-31
收藏 21MB ZIP 举报
对数据文件data.csv在根据课程设计要求在IDEA中编写Spark程序并打包成jar包,并且将数据文件data.csv上传到HDFS中,提交jar包到Spark集群中运行。
data.csv里面包含餐厅数据,可以用记事本打开查看,主要包含以下13个字段,字段之间由逗号分隔:
"所属年月","商家名称","主营类型","店铺URL","特色菜","累计评论数","累计销售人次","店铺评分","本月销量","本月销售额","城市","商家地址","电话"
课程设计要求完成以下任务,在IDEA中创建Maven项目,提供pom.xml文件给大家,完成以下任务。
一、将data.csv文件上传到hdfs的/cateringdata/目录下。
二、编写Spark程序,实现以下功能,并且将程序打包为jar包
1.去掉"本月销量","本月销售额"(第8列和第9列)的数据异常(数据为空字符串或者null或者为0),并且统计去掉了多少条;这一步清洗获得的数据要以逗号分割,存到hdfs的/foodsparktask1目录下;
2.去掉"店铺评分"数据为null的数据,并统计去掉了多少条;
3.去掉"店
淮猪
- 粉丝: 131
- 资源: 7
最新资源
- 平面等离子体手性纳米材料结构-comsol模型
- 基于OpenCV的全景图像拼接生成器
- 基于等效燃油消耗最小的并联式混合动力能量管理策略控制策略(ECMS) ①(工况可自行添加); ②仿真图像包括 发动机转矩变化图像、电机转矩变化图像、电池SOC变化图像、车速变化图像; ③整车simil
- Sim-EKB-Install-2024-12-08
- 变频器原理及应用实验讲义-最终版.doc
- 力扣 732. 我的日程安排表 III
- 锂电池充电器用不对称半桥反激变器电路仿真 两个管子均可实现ZVS 模型包含开环和电压闭环控制 运行环境为matlab simulink
- Request的主要作用,操作.md
- 机nvh分析电磁仿真Maxwell电机电磁振动噪声NVH分析 包括Maxwell仿真基础 电磁力理论分析计算 Maxwell电磁力仿真计算 电磁力耦合到结构场谐响应分析等
- node-red-4.0.8.zip 2025最新
- 一种新的多变量干旱严重指数来识别短期水文信号:以亚马逊河流域为例研究
- 异构系统分组编队跟踪控制(无文献)
- 豆瓣电影数据集,可以用于电影数据可视化分析
- java-23-doc
- 豆瓣电影数据集,可以用于电影数据可视化分析
- 【本科毕业设计】-含甲胺基化合物的消毒副产物NDMA特性与机理研究-word论文