from moviepy.editor import AudioFileClip
from pydub import AudioSegment
from pydub.silence import detect_silence
from aip import AipSpeech
import os
import time
import re
import requests
import js2py
class baidu_Translate():
def __init__(self):
self.js = js2py.eval_js('''
var i = null;
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a: r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
var hash = function e(r,gtk) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice( - 10).join(""))
}
var u = void 0,
u = null !== i ? i: (i = gtk || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A: (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)
}
for (
var p = m,F = "+-a^+6", D = "+-3^+b+-f", b = 0;
b < S.length; b++) p += S[b],p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
''')
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36', }
self.session = requests.Session()
self.session.get('https://fanyi.baidu.com', headers=headers)
response = self.session.get('https://fanyi.baidu.com', headers=headers)
self.token = re.findall("token: '(.*?)',", response.text)[0]
self.gtk = '320305.131321201' # re.findall("window.gtk = '(.*?)';", response.text, re.S)[0]
def translate(self, query, from_lang='en', to_lang='zh'):
# langdetect
self.session.post('https://fanyi.baidu.com/langdetect', data={'query': query})
# clickEvent
self.session.get('https://click.fanyi.baidu.com/?src=1&locate=zh&action=query&type=1&page=1')
# translate
data = {
'from': from_lang,
'to': to_lang,
'query': query,
'transtype': 'realtime',
'simple_means_flag': '3',
'sign': self.js(query, self.gtk),
'token': self.token
}
response = self.session.post('https://fanyi.baidu.com/v2transapi', data=data)
json = response.json()
if 'error' in json:
pass
# return 'error: {}'.format(json['error'])
else:
return response.json()['trans_result']['data'][0]['dst']
class baidu_SpeechRecognition():
def __init__(self, dev_pid):
# 百度语音识别API
Speech_APP_ID = '19712136'
Speech_API_KEY = 'Loo4KbNtagchc2BLdCnHEnZl'
Speech_SECRET_KEY = 'DO4UlSnw7FzpodU2G3yXQSHLv6Q2inN8'
self.dev_pid = dev_pid
self.SpeechClient = AipSpeech(Speech_APP_ID, Speech_API_KEY, Speech_SECRET_KEY)
self.TranslClient = baidu_Translate()
def load_audio(self, audio_file):
self.source = AudioSegment.from_wav(audio_file)
def speech_recognition(self, offset, duration, fanyi):
data = self.source[offset * 1000:duration * 1000].raw_data
result = self.SpeechClient.asr(data, 'wav', 16000, {'dev_pid': self.dev_pid, })
fanyi_text = ''
if fanyi:
try:
fanyi_text = self.TranslClient.translate(result['result'][0])
except:
pass
try:
return [result['result'][0], fanyi_text]
except:
# print('错误:',result)
return ['', '']
def cut_point(path, dbfs=1.25):
sound = AudioSegment.from_file(path, format="wav")
tstamp_list = detect_silence(sound, 600, sound.dBFS * dbfs, 1)
timelist = []
for i in range(len(tstamp_list)):
if i == 0:
back = 0
else:
back = tstamp_list[i - 1][1] / 1000
timelist.append([back, tstamp_list[i][1] / 1000])
min_len = 0.5
max_len = 5
result = []
add = 0
total = len(timelist)
for x in range(total):
if x + add < total:
into, out = timelist[x + add]
if out - into > min_len and out - into < max_len and x + add + 1 < total:
add += 1
out = timelist[x + add][1]
result.append([into, out])
elif out - into > max_len:
result.append([into, out])
else:
break
return result
def cut_text(text, length=38):
newtext = ''
if len(text) > length:
while True:
cutA = text[:length]
cutB = text[length:]
newtext += cutA + '\n'
if len(cutB) < 4:
newtext = cutA + cutB
break
elif len(cutB) > length:
text = cutB
else:
newtext += cutB
break
return newtext
return text
def progressbar(total, temp, text='&&', lenght=40):
content = '\r' + text.strip().replace('&&', '[{0}{1}]{2}%')
percentage = round(temp / total * 100, 2)
a = round(temp / total * lenght)
b = lenght - a
print(content.format('■' * a, '□' * b, percentage), end='')
def format_time(seconds):
sec = int(seconds)
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
fm = int(str(round(seconds, 3)).split('.')[-1])
return "%02d:%02d:%02d,%03d" % (h, m, s, fm)
def separate_audio(file_path, save_path):
audio_file = save_path + '\\tmp.wav'
audio = AudioFileClip(file_path)
audio.write_audiofile(audio_file, ffmpeg_params=['-ar', '16000', '-ac', '1'], logger=None)
return audio_file
def file_filter(path, alldir=False):
key = ['mp4', 'mov']
if alldir:
dic_list = os.walk(path)
else:
dic_list = os.listdir(path)
find_list = []
for i in dic_list:
if os.path.isdir(i[0]):
header = i[0]
file = i[2]
for f in file:
for k in key:
if f.rfind(k) != -1:
find_list.append([header, f])
else:
for k in key:
if i.rfind(k) != -1:
find_list.append([path, i])
if find_list:
find_list.sort(key=lambda txt: re.findall(r'\d+', txt[1])[0])
return find_list
def countTime(s_time, now=True):
if now: s_time = (time.time() - s_time)
m, s = divmod(int(s_tim
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
1.本项目基于语音识别API,结合了语音识别、视频转换音频识别以及语句停顿分割识别等多种技术,从而实现了高效的视频字幕生成。 2.项目运行环境:在Windows环境下完成Python 3所需的配置,并运行代码即可。 3.项目包括7个模块:数据预处理、翻译、格式转换、音频切割、语音识别、文本切割和main函数。基于百度语音API得到所需要的APP_ID、API_KEY、SECRET_KEY。进入百度语音官网地址为http://yuyin.baidu.com。将识别的英文结果使用爬虫调用百度翻译,得到对应的中文翻译。使用moviepy库完成从视频中提取音频工作。使用pydub库,利用停顿时的音频分贝降低作为判定断句标准,设置停顿时的分贝阈值。调用百度语音识别API进行操作,上传待识别音频,进行中文或英文识别。断句,避免同一画面内出现过多文字影响观感。 4.项目博客:https://blog.csdn.net/qq_31136513/article/details/132205049
资源推荐
资源详情
资源评论
收起资源包目录
智能语音识别和字幕推荐系统——深度学习算法应用(含全部工程源码)+测试数据集.zip (5个子文件)
智能语音识别和字幕推荐系统——深度学习算法应用(含全部工程源码)+测试数据集
.DS_Store 6KB
英文字幕.srt 4KB
[高清 720P] 【奥巴马最新演讲】《英文演讲》_Trim.mp4 15.57MB
谷健+任家旺.py 13KB
中英文字幕.srt 7KB
共 5 条
- 1
资源评论
小胡说人工智能
- 粉丝: 1w+
- 资源: 52
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功