From 4dcf1e9282f382bb4082081483dbd5096d2e837f Mon Sep 17 00:00:00 2001 From: Jimmy Xiang Date: Sat, 8 Feb 2020 11:02:28 +0800 Subject: [PATCH] Site updated: 2020-02-08 11:02:27 --- article/about/index.html | 19 -------- article/python-class-01/index.html | 19 -------- article/python-class-02/index.html | 19 -------- article/python-cs224n-01/index.html | 19 -------- article/python-cs224n-02/index.html | 19 -------- article/python-new-01/index.html | 19 -------- article/python-new-02/index.html | 19 -------- article/python-nlp-01/index.html | 66 ++++++++++++++++++--------- article/python-oa-01/index.html | 19 -------- article/python-oa-02/index.html | 19 -------- article/python-oa-03/index.html | 19 -------- article/python-oa-04/index.html | 19 -------- article/python-oa-05/index.html | 19 -------- article/python-tutorial-01/index.html | 19 -------- article/python-tutorial-02/index.html | 19 -------- article/python-tutorial-03/index.html | 19 -------- 16 files changed, 44 insertions(+), 307 deletions(-) diff --git a/article/about/index.html b/article/about/index.html index c0db6df..5d682ed 100644 --- a/article/about/index.html +++ b/article/about/index.html @@ -31,8 +31,6 @@ - - @@ -120,22 +118,5 @@ - - - - - diff --git a/article/python-class-01/index.html b/article/python-class-01/index.html index afaf545..7225f1b 100644 --- a/article/python-class-01/index.html +++ b/article/python-class-01/index.html @@ -31,8 +31,6 @@ - - @@ -179,22 +177,5 @@ - - - - - diff --git a/article/python-class-02/index.html b/article/python-class-02/index.html index 723c326..342e5c2 100644 --- a/article/python-class-02/index.html +++ b/article/python-class-02/index.html @@ -31,8 +31,6 @@ - - @@ -147,22 +145,5 @@ - - - - - diff --git a/article/python-cs224n-01/index.html b/article/python-cs224n-01/index.html index fb42c04..6edcf09 100644 --- a/article/python-cs224n-01/index.html +++ b/article/python-cs224n-01/index.html @@ -31,8 +31,6 @@ - - @@ -162,22 +160,5 @@ - - - - - diff --git a/article/python-cs224n-02/index.html b/article/python-cs224n-02/index.html index 9ea6877..af66e11 100644 --- a/article/python-cs224n-02/index.html +++ b/article/python-cs224n-02/index.html @@ -31,8 +31,6 @@ - - @@ -155,22 +153,5 @@ - - - - - diff --git a/article/python-new-01/index.html b/article/python-new-01/index.html index c1d15f0..e19020d 100644 --- a/article/python-new-01/index.html +++ b/article/python-new-01/index.html @@ -31,8 +31,6 @@ - - @@ -167,22 +165,5 @@ - - - - - diff --git a/article/python-new-02/index.html b/article/python-new-02/index.html index aa12759..9f1330e 100644 --- a/article/python-new-02/index.html +++ b/article/python-new-02/index.html @@ -31,8 +31,6 @@ - - @@ -144,22 +142,5 @@ - - - - - diff --git a/article/python-nlp-01/index.html b/article/python-nlp-01/index.html index b01050f..d30537e 100644 --- a/article/python-nlp-01/index.html +++ b/article/python-nlp-01/index.html @@ -31,8 +31,6 @@ - - @@ -111,10 +109,51 @@
  • 编写python代码并运行
  • 展示词云结果
  • -

    1.安装wordcloud

    可以在cmd窗口输入

    install wordcloud matplotlib```
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30

    ![](http://jcjview.github.io/img/wordcloud001.png)


    ## 2.准备红楼梦文本

    文本可以用下面链接下载


    `https://github.com/flypythoncom/flypython/blob/master/wordcloud_hlm_seg.txt`

    或者可以自己写代码,对文本进行清洗,分词。
    这里需要安装jieba分词,`pip install jieba`
    ``` python
    import jieba
    import re

    special_character_removal = re.compile(r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]', re.IGNORECASE)

    fw=open("hlm_seg.txt","w",encoding="utf-8")

    with open('hlm.txt',encoding="utf-8") as fp:
    for line in fp:
    l = special_character_removal.sub('', line.strip())
    words=jieba.cut(l)
    t=" ".join(words)
    fw.write(t)
    fw.write("\n")

    fw.close()

    -

    3. 编写词云python代码并运行

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    from os import path  
    from wordcloud import WordCloud

    d = path.dirname(__file__)
    # Read the whole text.
    text = open(path.join(d, 'hlm_seg.txt'),encoding="utf-8").read()
    # Generate a word cloud image
    # font=path.join(d, "simkai.ttf")
    font='C:/Windows/Fonts/simkai.ttf'
    wordcloud = WordCloud(font_path=font,#设置中文字体,不指定就会出现中文不显示
    width=1024,#宽
    height=840,#高
    background_color='white',#设置背景色
    # max_words=100,#最大词汇数
    # max_font_size=100#最大号字体
    ).generate(text)

    # Display the generated image:
    # the matplotlib way:
    import matplotlib.pyplot as plt

    plt.figure()
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
    +

    1.安装wordcloud

    可以在cmd窗口输入

    +

    pip install wordcloud matplotlib

    +

    +

    2.准备红楼梦文本

    文本可以用下面链接下载

    +

    https://github.com/flypythoncom/flypython/blob/master/wordcloud_hlm_seg.txt

    +

    或者可以自己写代码,对文本进行清洗,分词。
    这里需要安装jieba分词,pip install jieba

    +
    import jieba
    +import re
     
    -

    结果:

    +special_character_removal = re.compile(r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]', re.IGNORECASE) + +fw=open("hlm_seg.txt","w",encoding="utf-8") + +with open('hlm.txt',encoding="utf-8") as fp: + for line in fp: + l = special_character_removal.sub('', line.strip()) + words=jieba.cut(l) + t=" ".join(words) + fw.write(t) + fw.write("\n") +fw.close()

    3. 编写词云python代码并运行

    from os import path  
    +from wordcloud import WordCloud
    +
    +d = path.dirname(__file__)  
    +# Read the whole text.  
    +text = open(path.join(d, 'hlm_seg.txt'),encoding="utf-8").read()  
    +# Generate a word cloud image  
    +# font=path.join(d, "simkai.ttf")  
    +font='C:/Windows/Fonts/simkai.ttf'  
    +wordcloud = WordCloud(font_path=font,#设置中文字体,不指定就会出现中文不显示  
    +  width=1024,#宽  
    +  height=840,#高  
    +  background_color='white',#设置背景色   
    +  # max_words=100,#最大词汇数  
    +  # max_font_size=100#最大号字体  
    +  ).generate(text)  
    +
    +# Display the generated image:  
    +# the matplotlib way:  
    +import matplotlib.pyplot as plt  
    +
    +plt.figure()  
    +plt.imshow(wordcloud)  
    +plt.axis("off")  
    +plt.show()

    结果:

    词云运行结果

    后台回复“词云”获得完整运行代码

    人生苦短,我用python早下班。如果觉得不错,对你工作中有帮助,请加我微信公众号flypython,我们一起探讨python相关问题

    @@ -136,22 +175,5 @@ - - - - - diff --git a/article/python-oa-01/index.html b/article/python-oa-01/index.html index b36a9f6..94d7457 100644 --- a/article/python-oa-01/index.html +++ b/article/python-oa-01/index.html @@ -31,8 +31,6 @@ - - @@ -153,22 +151,5 @@ - - - - - diff --git a/article/python-oa-02/index.html b/article/python-oa-02/index.html index c86cf43..1db1964 100644 --- a/article/python-oa-02/index.html +++ b/article/python-oa-02/index.html @@ -31,8 +31,6 @@ - - @@ -208,22 +206,5 @@ - - - - - diff --git a/article/python-oa-03/index.html b/article/python-oa-03/index.html index 59bf73d..254c352 100644 --- a/article/python-oa-03/index.html +++ b/article/python-oa-03/index.html @@ -31,8 +31,6 @@ - - @@ -143,22 +141,5 @@ - - - - - diff --git a/article/python-oa-04/index.html b/article/python-oa-04/index.html index 5531825..669c965 100644 --- a/article/python-oa-04/index.html +++ b/article/python-oa-04/index.html @@ -31,8 +31,6 @@ - - @@ -153,22 +151,5 @@ - - - - - diff --git a/article/python-oa-05/index.html b/article/python-oa-05/index.html index bfc7dd2..50269ea 100644 --- a/article/python-oa-05/index.html +++ b/article/python-oa-05/index.html @@ -31,8 +31,6 @@ - - @@ -138,22 +136,5 @@ - - - - - diff --git a/article/python-tutorial-01/index.html b/article/python-tutorial-01/index.html index d522397..d05f756 100644 --- a/article/python-tutorial-01/index.html +++ b/article/python-tutorial-01/index.html @@ -31,8 +31,6 @@ - - @@ -153,22 +151,5 @@ - - - - - diff --git a/article/python-tutorial-02/index.html b/article/python-tutorial-02/index.html index 0e1be01..989e489 100644 --- a/article/python-tutorial-02/index.html +++ b/article/python-tutorial-02/index.html @@ -31,8 +31,6 @@ - - @@ -158,22 +156,5 @@ - - - - - diff --git a/article/python-tutorial-03/index.html b/article/python-tutorial-03/index.html index d09b064..3da5131 100644 --- a/article/python-tutorial-03/index.html +++ b/article/python-tutorial-03/index.html @@ -31,8 +31,6 @@ - - @@ -228,22 +226,5 @@ - - - - -