[visualization]nextworkx_example - graphic data visualization

buid and modify graph

import networkx as nx
graph = nx.Graph()
graph.add_node(1)
graph.add_nodes_from(range(2,4))
graph.add_nodes_from(['u','v'])
graph.nodes()
[1, 2, 3, 'u', 'v']
graph.add_edge(1,2)
graph.add_edge('u','v')
graph.add_edges_from([(1,3),(1,4),(1,5),(1,6)]) # automatically ignore the repeated edges
graph.add_edge('u','w')
graph.edges()
[(1, 2), (1, 3), (1, 4), (1, 5), (1, 6), ('u', 'v'), ('u', 'w')]
graph.nodes()
[1, 2, 3, 4, 'u', 6, 'w', 5, 'v']
graph.remove_node(2)
graph.nodes()
[1, 3, 4, 'u', 6, 'w', 5, 'v']
graph.edges()
[(1, 3), (1, 4), (1, 5), (1, 6), ('u', 'v'), ('u', 'w')]
graph.remove_nodes_from([4,5])
graph.nodes()
[1, 3, 'u', 6, 'w', 'v']
graph.edges()
[(1, 3), (1, 6), ('u', 'v'), ('u', 'w')]
graph.remove_edge(1,3)
graph.remove_edges_from([(1,2),('u','v')]) # ignore the non-exist edges 
graph.number_of_nodes()
6
graph.number_of_edges()
2

visualize graph

g = nx.karate_club_graph() # a dataset from its install package
import matplotlib.pyplot as plt
kw = {'with_labels':True, 'node_color':'lightblue', 'edge_color':'gray'}
%matplotlib inline
nx.draw(g,**kw)

g.degree() # key is node id , value is degree
{0: 16,
 1: 9,
 2: 10,
 3: 6,
 4: 3,
 5: 4,
 6: 4,
 7: 4,
 8: 5,
 9: 2,
 10: 3,
 11: 1,
 12: 2,
 13: 5,
 14: 2,
 15: 2,
 16: 2,
 17: 2,
 18: 2,
 19: 3,
 20: 2,
 21: 2,
 22: 2,
 23: 5,
 24: 3,
 25: 3,
 26: 2,
 27: 4,
 28: 3,
 29: 4,
 30: 4,
 31: 6,
 32: 12,
 33: 17}
g.degree(33)
17
g.number_of_nodes()
34
g.number_of_edges()
78

random graph ER model

from scipy.stats import bernoulli
bernoulli.rvs(p=0.1)
0
bernoulli.rvs(p=0.5)
0
bernoulli.rvs(p=0.5)
0
bernoulli.rvs(p=0.5)
1
import itertools
g = nx.Graph()
n_nodes = 20
p = 0.2
g.add_nodes_from(range(n_nodes))
for node1, node2 in itertools.combinations(range(20),2):
    if bernoulli.rvs(p=p):
        g.add_edge(node1,node2)
nx.draw(g)

plot the degree distribution

def plot_degree_distribution(g):
    values = list(g.degree().values())
    plt.hist(values, histtype='step', normed=True)
    plt.xlabel('degree $k$')
    plt.ylabel('frequency $P(k)$')
    plt.title('degree distribution')
plot_degree_distribution(g)

def er_graph(n_nodes, p):
    g = nx.Graph()
    n_nodes = 20
    p = 0.2
    g.add_nodes_from(range(n_nodes))
    for node1, node2 in itertools.combinations(range(20),2):
        if bernoulli.rvs(p=p):
            g.add_edge(node1,node2)
    return g

Descriptive Statistics of Empirical Social Networks

import numpy as np
a1 = np.loadtxt('./adj_allVillageRelationships_vilno_1.csv',delimiter=',')
a2 = np.loadtxt('./adj_allVillageRelationships_vilno_2.csv',delimiter=',')
a1.shape, a2.shape
((843, 843), (877, 877))
g1 = nx.to_networkx_graph(a1)
g2 = nx.to_networkx_graph(a2)
def basic_net_stats(g):
    print('number of nodes ', g.number_of_nodes())
    print('number of edges ', g.number_of_edges())
    print('average degree ', np.mean(list(g.degree().values())))
basic_net_stats(g1)
number of nodes  843
number of edges  3405
average degree  8.07829181495
basic_net_stats(g2)
number of nodes  877
number of edges  3063
average degree  6.98517673888
plot_degree_distribution(g1) 
# er model is not fit in humman relationship in this case.

Largest Connected Component

gen = nx.connected_component_subgraphs(g1)
next(gen).number_of_nodes()
825
next(gen).number_of_nodes()
3
next(gen).number_of_nodes()
3
next(gen).number_of_nodes()
4
next(gen).number_of_nodes()
2
next(gen).number_of_nodes()
4
next(gen).number_of_nodes()
1
next(gen).number_of_nodes()
1
next(gen).number_of_nodes()
---------------------------------------------------------------------------

StopIteration                             Traceback (most recent call last)

<ipython-input-80-992d1a5e6975> in <module>()
----> 1 next(gen).number_of_nodes()


StopIteration: 
len(g1) == g1.number_of_nodes()
True
g1_lcc = max(nx.connected_component_subgraphs(g1), key=len)
plt.figure()
nx.draw(g1_lcc, node_color='red',edge_color='gray',node_size=20) # be patient 

g2_lcc = max(nx.connected_component_subgraphs(g2), key=len)
g1_lcc.number_of_nodes(), g2_lcc.number_of_nodes()
(825, 810)
g1_lcc.number_of_nodes()/len(g1), g2_lcc.number_of_nodes()/len(g2)
(0.9786476868327402, 0.9236031927023945)
全部评论

相关推荐

点赞 评论 收藏
分享
后端实习中的&nbsp;“好需求”,核心定义是能支撑面试深度讨论、可向外延伸多维度知识点的需求——&nbsp;本质是能让你在面试官拷打时,有足够空间展现技术积累、解决问题的能力,而非仅完成简单&nbsp;CRUD。结合面试反推逻辑,具体可分为三类,且都具备&nbsp;“可延伸、有讨论点”&nbsp;的共性。本质上是这个需求要支撑你能给面试官吹牛逼。典型的垃圾需求:或许有的同学可能还不理解什么叫做可以吹牛逼的需求,我举一个最简单的反例,很多同学写苍穹外卖的时候,总爱把一个需求写到简历上:&nbsp;&nbsp;基于OSS处理用户上传图片,获取OSS返回URL,实现用户远程上传图片。这就是个最典型的垃圾需求。因为你发现论代码链路,他没什么可讲的。论各种新潮技术,他也...
反装笔大队长:分情况吧。需求分业务需求和技术需求,技术需求你说的是对的。像CRM、OA、NC等等,这些业务系统很多时候对技术要求并不高的,不可否认的是 这些需求还是很不错的。 NC系统的进销存。实际上只是对仓库、库位、库存量、入库出库单价、数据报表等数据的统计与计算。CRM的市场活动、人面画像分析与统计、客户信息管理等,这些无非都是一些增删改查。对于业务需求面试官通常都是问你对业务的理解与过往对该业务的处理方案,并不会死磕技术。技术肯定是多多益善,但在业务开发中 正在有意义的是你的经历。
投递字节跳动等公司10个岗位
点赞 评论 收藏
分享
评论
点赞
收藏
分享

创作者周榜

更多
牛客网
牛客网在线编程
牛客网题解
牛客企业服务