业界动态
毕业设计 大数据抖音短视频数据分析与可视化
2024-11-10 18:21

毕设帮助,选题指导,技术解答,欢迎打扰,见B站个人主页

毕业设计 大数据抖音短视频数据分析与可视化

https://space.bilibili.com/33886978

# 读取数据df = pd.read_csv('data.csv')df.head()

df.info()

print('去重前:',df.shape[0],'行数据')print('去重后:',df.drop_duplicates().shape[0],'行数据')

print(np.sum(df.isnull()))

df['date'] = df['date'].astype('datetime64[ns]')df['real_time'] = df['real_time'].astype('datetime64[ns]')df['uid'] = df['uid'].astype('str')df['user_city'] = df['user_city'].astype('str')df['user_city'] = df['user_city'].apply(lambda x:x[:-2])df['item_id'] = df['item_id'].astype('str')df['author_id'] = df['author_id'].astype('str')df['item_city'] = df['item_city'].astype('str')df['item_city'] = df['item_city'].apply(lambda x:x[:-2])df['music_id'] = df['music_id'].astype('str')df['music_id'] = df['music_id'].apply(lambda x:x[:-2])

df.info()

user_city_count = user_info.groupby(['user_city']).count().sort_values(by=['uid'],ascending=False)x1 = list(user_city_count.index)y1 = user_city_count['uid'].tolist()len(y1)

#柱形图代码chart = Bar()chart.add_xaxis(x1)chart.add_yaxis('地区使用人数', y1, color='#F6325A',                     itemstyle_opts={'barBorderRadius':[60, 60, 20, 20]},                      label_opts=opts.LabelOpts(position='top'))chart.set_global_opts(datazoom_opts=opts.DataZoomOpts(    range_start=0,range_end=5,orient='horizontal',type_='slider',is_zoom_lock=False,  pos_left='1%' ),    visualmap_opts=opts.VisualMapOpts(is_show = False,type_='opacity',range_opacity=[0.2, 1]),                     title_opts=opts.TitleOpts(title="不同地区用户数量分布图",pos_left='40%'),                     legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%'))chart.render_notebook()

h_num = round((df.groupby(['H']).count()['uid']/10000),1).to_list()h = list(df.groupby(['H']).count().index)

chart = Line()chart.add_xaxis(h)chart.add_yaxis('观看数/(万)',h_num, areastyle_opts=opts.AreaStyleOpts(color = '#1AF5EF',opacity=0.3),                                  itemstyle_opts=opts.ItemStyleOpts(color='black'),                                  label_opts=opts.LabelOpts(font_size=12))chart.set_global_opts(legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%'),                     title_opts=opts.TitleOpts(title="不时间观看数量分布图",pos_left='40%'),)chart.render_notebook()

left = df.groupby(['H']).sum()[['finish','like']]right = df.groupby(['H']).count()['uid']per = pd.concat([left,right],axis=1)per['finish_radio'] = round(per['finish']*100/per['uid'],2)per['like_radio'] = round(per['like']*100/per['uid'],2)x = list(df.groupby(['H']).count().index)y1 = per['finish_radio'].to_list()y2 = per['like_radio'].to_list()#建立一个基础的图形chart1 = Line()chart1.add_xaxis(x)chart1.add_yaxis('完播率/%',y1,is_smooth=True,label_opts=opts.LabelOpts(is_show=False),is_symbol_show = False,                                      linestyle_opts=opts.LineStyleOpts(color='#F6325A',opacity=.7,curve=0,width=2,type_= 'solid' ))chart1.set_global_opts(yaxis_opts =  opts.AxisOpts(min_=25,max_=45))chart1.extend_axis(yaxis=opts.AxisOpts(min_=0.4,max_=3))#叠加折线图chart2 = Line()   chart2.add_xaxis(x)chart2.add_yaxis('点赞率/%',y2,yaxis_index=1,is_smooth=True,label_opts=opts.LabelOpts(is_show=False),is_symbol_show = False,                                            linestyle_opts=opts.LineStyleOpts(color='#1AF5EF',opacity=.7,curve=0,width=2,type_= 'solid' ))chart1.overlap(chart2) chart1.set_global_opts(legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%'),                     title_opts=opts.TitleOpts(title="点赞/完播率分布图",pos_left='40%'),)chart1.render_notebook()

df['weekday'] = df['date'].dt.weekdayweek = df.groupby(['weekday']).count()['uid'].to_list()df_pair = [['周一', week[0]], ['周二', week[1]], ['周三', week[2]], ['周四', week[3]], ['周五', week[4]], ['周六', week[5]], ['周日', week[6]]]chart = Pie()chart.add('', df_pair,radius=['40%', '70%'],rosetype='radius',center=['45%', '50%'],label_opts=opts.LabelOpts(is_show=True,formatter = '{b}:{c}次'))chart.set_global_opts(visualmap_opts=[opts.VisualMapOpts(min_=200000,max_=300000,type_='color', range_color=['#1AF5EF', '#F6325A', '#000000'],is_show=True,pos_top='65%')],                      legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%',orient='vertical'),                     title_opts=opts.TitleOpts(title="一周内播放分布图",pos_left='35%'),)chart.render_notebook()

df.groupby(['channel']).count()['uid']

author_info = df.drop_duplicates(['author_id','item_city'])[['author_id','item_city']]author_info.info()author_city_count = author_info.groupby(['item_city']).count().sort_values(by=['author_id'],ascending=False)x1 = list(author_city_count.index)y1 = author_city_count['author_id'].tolist()df.drop_duplicates(['author_id']).shape[0]

chart = Bar()chart.add_xaxis(x1)chart.add_yaxis('地区创作者人数', y1, color='#F6325A',                     itemstyle_opts={'barBorderRadius':[60, 60, 20, 20]})chart.set_global_opts(datazoom_opts=opts.DataZoomOpts(    range_start=0,range_end=5,orient='horizontal',type_='slider',is_zoom_lock=False,  pos_left='1%' ),    visualmap_opts=opts.VisualMapOpts(is_show = False,type_='opacity',range_opacity=[0.2, 1]),                     legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%'),                     title_opts=opts.TitleOpts(title="不同城市创作者分布图",pos_left='40%'))chart.render_notebook()

time = df.drop_duplicates(['item_id'])[['item_id','duration_time']]time = time.groupby(['duration_time']).count()x1 = list(time.index)y1 = time['item_id'].tolist()

chart = Bar()chart.add_xaxis(x1)chart.add_yaxis('视频时长对应视频数', y1, color='#1AF5EF',                     itemstyle_opts={'barBorderRadius':[60, 60, 20, 20]},               label_opts=opts.LabelOpts(font_size=12,  color='black'))chart.set_global_opts(datazoom_opts=opts.DataZoomOpts(    range_start=0,range_end=50,orient='horizontal',type_='slider'),    visualmap_opts=opts.VisualMapOpts(max_=100000,min_=200,is_show = False,type_='opacity',range_opacity=[0.4, 1]),                     legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%'),                     title_opts=opts.TitleOpts(title="不同时长作品分布图",pos_left='40%'))chart.render_notebook()

like_per = 100*np.sum(df['like'])/len(df['like'])finish_per = 100*np.sum(df['finish'])/len(df['finish'])gauge = Gauge()gauge.add("",[("视频互动率", like_per),['完播率',finish_per]],detail_label_opts=opts.LabelOpts(is_show=False,font_size=18),                                  axisline_opts=opts.AxisLineOpts(linestyle_opts=opts.LineStyleOpts(                                      color=[(0.3, "#1AF5EF"), (0.7, "#F6325A"), (1, "#000000")],width=20)))gauge.render_notebook()

df_cor = df[['finish','like','duration_time','H']] # 只选取部分cor_table = df_cor.corr(method='spearman')cor_array = np.array(cor_table)cor_name = list(cor_table.columns)value = [[i, j, cor_array[i,j]] for i in [3,2,1,0] for j in [0,1,2,3]] heat = HeatMap()heat.add_xaxis(cor_name)heat.add_yaxis("",cor_name,value,label_opts=opts.LabelOpts(is_show=True, position="inside"))heat.set_global_opts(visualmap_opts=opts.VisualMapOpts(is_show=False, max_=0.08, range_color=["#1AF5EF", "#F6325A", "#000000"]))heat.render_notebook()

temp = df['date'].to_list()puv = df.groupby(['date']).agg({'uid':'nunique','item_id':'count'})uv = puv['uid'].to_list()pv = puv['item_id'].to_list()time = puv.index.to_list()chart1 = Line()chart1.add_xaxis(time)chart1.add_yaxis('uv',uv,is_smooth=True,label_opts=opts.LabelOpts(is_show=False),is_symbol_show = False,                linestyle_opts=opts.LineStyleOpts(color='#1AF5EF',opacity=.7,curve=0,width=2,type_= 'solid' ))chart1.add_yaxis('pv',pv,is_smooth=True,label_opts=opts.LabelOpts(is_show=False),is_symbol_show = False,                linestyle_opts=opts.LineStyleOpts(color='#F6325A',opacity=.7,curve=0,width=2,type_= 'solid' ))chart1.render_notebook()

lc = []for i in range(len(time)-7):    bef = set(list(df[df['date']==time[i]]['uid']))    aft = set(list(df[df['date']==time[i+7]]['uid']))    stay = bef&aft    per = round(100*len(stay)/len(bef),2)    lc.append(per)    lc1 = []for i in range(len(time)-1):    bef = set(list(df[df['date']==time[i]]['uid']))    aft = set(list(df[df['date']==time[i+1]]['uid']))    stay = bef&aft    per = round(100*len(stay)/len(bef),2)    lc1.append(per)x7 = time[0:-7]chart1 = Line()chart1.add_xaxis(x7)chart1.add_yaxis('七日留存率/%',lc,is_smooth=True,label_opts=opts.LabelOpts(is_show=False),is_symbol_show = False,                linestyle_opts=opts.LineStyleOpts(color='#F6325A',opacity=.7,curve=0,width=2,type_= 'solid' ))chart1.set_global_opts(legend_opts=opts.LegendOpts(pos_right='10%',pos_top='2%'),                     title_opts=opts.TitleOpts(title="用户留存率分布图",pos_left='40%'),)chart1.render_notebook()

df1 = df.groupby(['uid']).agg({'item_id':'count','like':'sum','finish':'sum'})df1['like_per'] = df1['like']/df1['item_id']df1['finish_per'] = df1['finish']/df1['item_id']ndf1 = np.array(df1[['item_id','like_per','finish_per']])#.shapekmeans_per_k = [KMeans(n_clusters=k).fit(ndf1) for k in range(1,8)]inertias = [model.inertia_ for model in kmeans_per_k]chart = Line(init_opts=opts.InitOpts(width='560px',height='300px'))chart.add_xaxis(range(1,8))chart.add_yaxis("",inertias,label_opts=opts.LabelOpts(is_show=False),                linestyle_opts=opts.LineStyleOpts(color='#F6325A',opacity=.7,curve=0,width=3,type_= 'solid' ))chart.render_notebook()

n_cluster = 4cluster = KMeans(n_clusters=n_cluster,random_state=0).fit(ndf1)y_pre = cluster.labels_ # 查看聚好的类from sklearn.metrics import silhouette_scorefrom sklearn.metrics import silhouette_samplessilhouette_score(ndf1,y_pre) n_cluster = 3cluster = KMeans(n_clusters=n_cluster,random_state=0).fit(ndf1)y_pre = cluster.labels_ # 查看聚好的类from sklearn.metrics import silhouette_scorefrom sklearn.metrics import silhouette_samplessilhouette_score(ndf1,y_pre)

c_ = [[],[],[]]c_[0] = [87.998,9.1615,39.92]c_[1] = [13.292,12.077,50.012]c_[2] = [275.011,8.125,28.751]bar = Bar(init_opts=opts.InitOpts(theme='macarons',width='1000px',height='400px')) # 添加分类(x轴)的数据bar.add_xaxis(['播放数','点赞率(千分之)','完播率(百分之)'])bar.add_yaxis('0', [round(i,2) for i in c_[0]], stack='stack0') bar.add_yaxis('1',[round(i,2) for i in c_[1]], stack='stack1') bar.add_yaxis('2',[round(i,2) for i in c_[2]], stack='stack2') bar.render_notebook()

毕设帮助,选题指导,技术解答,欢迎打扰,见B站个人主页

    以上就是本篇文章【毕业设计 大数据抖音短视频数据分析与可视化】的全部内容了,欢迎阅览 ! 文章地址:http://sjzytwl.xhstdz.com/news/5215.html 
     资讯      企业新闻      行情      企业黄页      同类资讯      首页      网站地图      返回首页 物流园资讯移动站 http://mip.xhstdz.com/ , 查看更多   
最新新闻
S-HUB 微伴助手应用连接器对接能力
微伴助手是一款安全的企业微信第三方应用,已经为电商、教育、金融、保险、医疗等机构提供技术支持,是武汉夜莺科技有限公司旗下
中国三线城市大学“十佳”,江苏大学第四,第一名是985
高考填志愿,众多的考生们,都想去一线城市读大学。在大家的固有印象中,重点总是和一线城市绑定在一起的。实际上,也有众多的中
各类娱乐场所有哪些类型?
引言 在现代社会中,娱乐场所成为了人们休闲、娱乐的重要去处。无论是工作压力的释放,还是与朋友聚会的选择,各类娱乐设施都在其中扮演着不可或缺的角色。从传统的电影院到新兴的VR体验馆,每种场所都拥有自己独特的魅力和吸引力。本文将
运动健身常识揭秘:新手必知的训练技巧与营养要点
引言:运动健身的重要性 在现代社会中,越来越多的人开始重视运动健身的重要性。无论是为了塑造美好的体型,还是为了保持健康的生活方式,运动健身都已成为人们生活中不可或缺的一部分。特别是对于新手而言,了解运动健身的常识性知识及正
如何快速找到Nginx配置文件路径?必看技巧揭秘!
引言:Nginx的重要性 在当今互联网时代,Nginx以其高效、轻量和灵活的特性在众多网络服务器中脱颖而出。无论是处理高并发的请求,还是作为反向代理服务器,Nginx已经成为许多企业和开发者的首选。随着Nginx的广泛使用,了解其配置文件的位
国家中医养生师资格证如何获得?
国家中医养生师资格证的价值 在日益追求健康生活方式的今天,中医养生受到越来越多人的关注。作为一种传统的医学体系,中医不仅关注疾病的治疗,更注重通过调整身体的整体状态来实现健康。获得国家中医养生师资格证,不仅能够提升个人的专
探索新机遇:2023旅游发展大会主题解读与未来趋势展望
引言:旅游发展大会的重要性 2023旅游发展大会不仅是行业内一场重要的盛会,更是推动全球旅游行业发展的关键时刻。随着后疫情时代的来临,旅游行业面临着巨大的挑战与机遇。各国政府、企业界及学术界的精英们汇聚一堂,共同探讨未来旅游发
网站优化建设方案模板分享
网站优化建设的重要性 在当今数字化时代,拥有一个良好的网站是企业成功的关键之一。随着互联网的发展,越来越多的消费者通过网络查找产品和服务,这使得网站的优化建设显得尤为重要。一个优化良好的网站不仅能够提升用户体验,还能显著提
红色旅游景点大揭秘
红色旅游:追寻历史足迹的旅程 红色旅游,作为一种独特的文化旅游形式,正受到越来越多游客的青睐。它不仅是对历史的追溯,更是对文化的传承和对初心的再认识。红色旅游景点遍布全国,让游客在游览的同时,能够深入了解中国共产党艰苦卓绝
如何设置服务器配置参数?配置要求一键搞定!
为什么需要设置服务器配置参数? 在搭建和管理服务器的过程中,设置服务器配置参数是非常重要的一环。合理的配置能够提高服务器的性能,加强安全性,并且提升用户体验。如果配置不当,不仅可能会导致服务器性能下降,还会增加安全风险,甚
本企业新闻