#!/usr/bin/python
# -*- coding: utf-8 -*-
############### COMP7404 Assignment 1 ##################
'''
This program builds a linear regression predictor to
forecast the price of houses. The algorithm consists
of four steps:
(1) Loading training data from a text file into numpy matrices
(2) Computing optimal parameter values
(3) Loading testing data from a text file into numpy matrices
(4) Predicting values for testing data and evaluating performance
My submission for this assignment is entirely my own original work done
exclusively for this assignment. I have not made and will not make my
solutions to assignment, homework, quizzes or exams available to anyone else.
These include both solutions created by me and any official solutions provided
by the course instructor or TA. I have not and will not engage in any
activities that dishonestly improve my results or dishonestly improve/hurt
the results of others.
### ACKNOWLEDGMENT (Type your full name and date here)
Your full name: CHEN Han
Date: 10 November 2018
###
'''
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# This function is an exercise to load a file, create an array and implement arithmetic operations on array
def warmUp(warmUp_path):
label = ['column1', 'column2', 'column3']
warmUp_list = [[ 7.3, 2.4, 6.2],[ 5.6, 3.7, 4.8]]
warmUp_data = pd.DataFrame(columns=label, data=warmUp_list)
warmUp_data.to_csv('./warmUp.csv', index= False)
# load the given warmUp.csv file in an array format
# the expected result is [[ 7.3, 2.4, 6.2],[ 5.6, 3.7, 4.8]]
array_A = pd.read_csv(warmUp_path, sep=',').values
# print the label of warmUp.csv
# the expected result is Index([u'column1', u'column2', u'column3'], dtype='object')
label = pd.read_csv(warmUp_path, sep=',').columns
### START CODE HERE ###
# creat an array array_B [[ 3.5, 4.7, 5.5],[ 4.8, 6.2, 3.9]]
array_B = [[3.5,4.7,5.5],[4.8,6.2,3.9]]
# calculate the transpose of array_B as transpose_B
# the expected result is [[ 3.5, 4.8],[ 4.7, 6.2],[ 5.5, 3.9]]
transpose_B = np.transpose(array_B)
# calculate the sum of array_A and array_B as sum_AB
# the expected result is [[ 10.8, 7.1, 11.7],[ 10.4, 9.9, 8.7]]
sum_AB = np.add(array_A,array_B)
# calculate the difference between array_A and array_B as diff_AB
# the expected result is [[ 3.8, -2.3, 0.7],[ 0.8, -2.5, 0.9]]
diff_AB = np.add(array_A,np.negative(array_B))
# calculate the elementwise product of array_A and array_B as ew_product_AB
# the expected result is [[ 25.55, 11.28, 34.1],[ 26.88, 22.94, 18.72]]
ew_product_AB = np.multiply(array_A,array_B)
# calculate the array product of array_A and transpose_B as mat_product_ABt
# the expected result is [[ 70.93, 74.1],[ 63.39, 68.54]]
mat_product_AB = np.dot(array_A,transpose_B)
# calculate the true quotient (return the float division result) between A and B as divide_AB
# the expected result is [[ 2.08571429, 0.5106383, 1.12727273],[ 1.16666667, 0.59677419, 1.23076923]]
divide_AB = np.divide(array_A,array_B)
# return the maximum of each row in array_B as max_row_B
# the expected result is [ 5.5, 6.2]
max_row_B = np.max(array_B,1)
# return the minimum of each column in array_B as min_column_B
# the expected result is [ 3.5, 4.7, 3.9]
min_column_B = np.min(array_B,0)
# calculate the mean value of each row in array_B as mean_row_B
# the expected result is [ 4.56666667, 4.96666667]
mean_row_B = np.mean(array_B,1)
### END CODE HERE ###
result = [{"array_A": array_A},
{"label": label},
{"array_B": array_B},
{"transpose_B": transpose_B},
{"sum_AB": sum_AB},
{"diff_AB": diff_AB},
{"ew_product_AB": ew_product_AB},
{"mat_product_AB": mat_product_AB},
{"divide_AB": divide_AB},
{"max_row_B": max_row_B},
{"min_column_B": min_column_B},
{"mean_row_B": mean_row_B}]
return result
# This function is to load the training dataset and testing dataset (from the given .csv files)
# inputs (train_path, test_path) denote the paths of training dataset and testing dataset
# append a column of ones to the front of the datasets to denote the constant vector b
# outputs (X_train, y_train, X_test, y_test) denote the normalized training attribute value array, training label vector, testing attribute value array, testing label vector
def load_dataset(train_path,test_path):
# train_data, test_data and label denote the original training data, testing data in array format and the column labels
train_data = pd.read_csv(train_path, sep=',').values
test_data = pd.read_csv(test_path, sep=',').values
label = pd.read_csv(train_path, sep=',').columns
# m_train and m_test denote the number of training data and testing data
m_train = train_data.shape[0]
m_test = test_data.shape[0]
### START CODE HERE ###
# s_train_array, s_test_array denote the scaled train_data and test_data
s_train_array= np.divide(train_data-np.sum(train_data,0)/m_train,np.add(np.max(train_data,0),np.negative(np.min(train_data,0))))
s_test_array = np.divide(test_data-np.sum(train_data,0)/m_train,np.add(np.max(train_data,0),np.negative(np.min(train_data,0))))
### END CODE HERE ###
# append a column of ones to the front of the datasets
# train_append_ones and test_append_ones denote the ones vector appended to the datasets
# train_boston_hp_data and test_boston_hp_data denote the modified training data and testing data after the appending operation
train_append_ones = np.ones([m_train,1])
test_append_ones = np.ones([m_test,1])
train_boston_hp_data = np.column_stack((train_append_ones, s_train_array))
test_boston_hp_data = np.column_stack((test_append_ones, s_test_array))
# train_columns and test_columns denote the number of columns of the modified training data and testing data
train_columns = train_boston_hp_data.shape[1]
test_columns = test_boston_hp_data.shape[1]
X_train = train_boston_hp_data[:,0:train_columns-1]
y_train = train_boston_hp_data[:,train_columns-1:]
X_test = test_boston_hp_data[:,0:test_columns-1]
y_test = test_boston_hp_data[:,test_columns-1:]
return X_train, y_train, X_test, y_test, label
# This function is to scale all values to [-1,1]
# inputs (train_array, test_array) denote the unscaled training array and testing array
# outputs (s_train_array, s_test_array) denote the scaled training array and testing array
def scaleFeature(train_array, test_array):
### START CODE HERE ###
# m_train and m_test store the number of training and testing examples
# m_train = np.divide(train_array - np.sum(train_array,0)/train_array.shape[0],np.add(np.max(train_array,0),np.negative(np.min(train_array,0))))
#print(m_train)
#m_test = np.divide(test_array - np.sum(train_array,0)/train_array.shape[0],np.add(np.max(train_array,0),np.negative(np.min(train_array,0))))
#print(m_test)
m_train = len(train_array)
m_test = len(test_array)
### END CODE HERE ###
# array_mean stores the means of the training data
array_mean = np.reshape(np.mean(train_array,axis=0),(1,-1))
# array_range is difference between max and min values
array_range = np.reshape(np.max(train_array,axis=0),(1,-1)) - np.reshape(np.min(train_array,axis=0),(1,-1))
s_train_array = np.true_divide((train_array - np.repeat(array_mean,m_train,axis=0)),np.repeat(array_range,m_train,axis=0))
print(s_train_array)
s_test_array = np.true_divide((test_array - np.repeat(array_mean,m_test,axis=0)),np.repeat(array_range,m_test,axis=0))
return s_train_array, s_test_array
# This function is to calculate the cost between predicted values and label values by use of MSE
# inputs (X, y, theta) denote the attribute value array, l

IT民工兼摸鱼达人
- 粉丝: 5
- 资源: 6
最新资源
- CDIF累计差值直方图算法二雷达信号第一阶matlab仿真程序
- python地下水位预测-10-字符串常用操作方法之判断开头或结尾.ev4.rar
- python地下水位预测-09-字符串常用操作方法之修改之字符串对齐.ev4.rar
- python地下水位预测-08-字符串常用操作方法之修改之删除空白字符.ev4.rar
- 科帮网javaWeb计算机配件报价系统项目源码 (2).zip
- k8s 1.26.0镜像下载
- CDIF累计差值直方图算法三雷达信号第四阶matlab仿真程序
- Rectangle.java
- Uniapp 表单组件 多层选择 开关 评分 滑块 步进器 日期框 级联选择 下拉列表 复选框 单选框 多行文本 单行文本
- 基于TensorFlow+CNN+协同过滤算法的智能电影推荐系统-深度学习算法应用(含微信小程序、ipynb工程源码)+数据集
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈


