sample
[py]
# coding: UTF-8
from bs4 import BeautifulSoup
html = ”'<html><body>
<h1>Title</h1>
<p>Hello, I’m John.</p>
</body></html>”’
soup = BeautifulSoup(html, "html.parser")
print(type(soup.p)) #=> <class ‘bs4.element.Tag’>
print(soup.p.name) #=> ‘p’
print(soup.p.text) #=> ‘Hello, I’m John.’
[/py]
sample
[py]
from bs4 import BeautifulSoup
html</span> <span class="o">=</span> <span class="s">'''<html><body>
<h1>Title</h1>
<a href="http://google.com/">Google</a>
<a href="http://yahoo.com/">Yahoo</a>
</body></html>'''</span>
soup = BeautifulSoup(html, "html.parser")</code></pre>
<pre class="highlight"><code><span class="n">links</span> <span class="o">=</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s">'a'</span><span class="p">)</span>
<span class="k">for</span> <span class="n">link</span> <span class="ow">in</span> <span class="n">links</span><span class="p">:</span>
<span class="k">if</span> <span class="s">'href'</span> <span class="ow">in</span> <span class="n">link</span><span class="o">.</span><span class="n">attrs</span><span class="p">:</span>
<span class="k">print</span><span class="p">(</span><span class="n">link</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="s">':'</span><span class="p">,</span> <span class="n">link</span><span class="o">.</span><span class="n">attrs</span><span class="p">[</span><span class="s">'href'</span><span class="p">])</span></code></pre>
<pre class="highlight">[/py]
コメント