<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=Scrapping%3A_Save_text_setiap_URL</id>
	<title>Scrapping: Save text setiap URL - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=Scrapping%3A_Save_text_setiap_URL"/>
	<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_URL&amp;action=history"/>
	<updated>2026-05-03T09:45:48Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.45.1</generator>
	<entry>
		<id>https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_URL&amp;diff=72179&amp;oldid=prev</id>
		<title>Unknown user: /* Tips: */</title>
		<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_URL&amp;diff=72179&amp;oldid=prev"/>
		<updated>2025-03-28T23:21:04Z</updated>

		<summary type="html">&lt;p&gt;&lt;span class=&quot;autocomment&quot;&gt;Tips:&lt;/span&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 23:21, 28 March 2025&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l75&quot;&gt;Line 75:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 75:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Bisa diubah agar simpan ke `.txt` atau `.json` juga.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Bisa diubah agar simpan ke `.txt` atau `.json` juga.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Mau filter halaman yang &amp;#039;&amp;#039;&amp;#039;bukan berita&amp;#039;&amp;#039;&amp;#039;? Bisa ditambahkan regex atau `if &amp;quot;news&amp;quot; in url`.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Mau filter halaman yang &amp;#039;&amp;#039;&amp;#039;bukan berita&amp;#039;&amp;#039;&amp;#039;? Bisa ditambahkan regex atau `if &amp;quot;news&amp;quot; in url`.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;==Pranala Menarik==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [[Scrapping]]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Unknown user</name></author>
	</entry>
	<entry>
		<id>https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_URL&amp;diff=72178&amp;oldid=prev</id>
		<title>Unknown user: Created page with &quot;==FITUR:== * Input dari `keywords.txt` * Cari tiap keyword di Google (ambil `top-N` URL) * Kunjungi tiap URL dan ambil kontennya (judul + paragraf) * Simpan semua ke `scraped_...&quot;</title>
		<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_URL&amp;diff=72178&amp;oldid=prev"/>
		<updated>2025-03-28T23:16:47Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;==FITUR:== * Input dari `keywords.txt` * Cari tiap keyword di Google (ambil `top-N` URL) * Kunjungi tiap URL dan ambil kontennya (judul + paragraf) * Simpan semua ke `scraped_...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;==FITUR:==&lt;br /&gt;
* Input dari `keywords.txt`&lt;br /&gt;
* Cari tiap keyword di Google (ambil `top-N` URL)&lt;br /&gt;
* Kunjungi tiap URL dan ambil kontennya (judul + paragraf)&lt;br /&gt;
* Simpan semua ke `scraped_results.csv`&lt;br /&gt;
&lt;br /&gt;
==Kebutuhan:==&lt;br /&gt;
&lt;br /&gt;
pip install googlesearch-python requests beautifulsoup4&lt;br /&gt;
&lt;br /&gt;
==SCRIPT FULL:==&lt;br /&gt;
&lt;br /&gt;
 from googlesearch import search&lt;br /&gt;
 import requests&lt;br /&gt;
 from bs4 import BeautifulSoup&lt;br /&gt;
 import csv&lt;br /&gt;
 import time&lt;br /&gt;
 &lt;br /&gt;
 def load_keywords(filename):&lt;br /&gt;
     with open(filename, &amp;#039;r&amp;#039;, encoding=&amp;#039;utf-8&amp;#039;) as f:&lt;br /&gt;
         return [line.strip() for line in f if line.strip()]&lt;br /&gt;
 &lt;br /&gt;
 def get_page_content(url):&lt;br /&gt;
     try:&lt;br /&gt;
         headers = {&lt;br /&gt;
             &amp;quot;User-Agent&amp;quot;: &amp;quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64)&amp;quot;&lt;br /&gt;
         }&lt;br /&gt;
         response = requests.get(url, headers=headers, timeout=10)&lt;br /&gt;
         soup = BeautifulSoup(response.content, &amp;#039;html.parser&amp;#039;) &lt;br /&gt;
 &lt;br /&gt;
         # Ambil judul halaman&lt;br /&gt;
         title = soup.title.string if soup.title else &amp;#039;No Title&amp;#039;&lt;br /&gt;
         &lt;br /&gt;
         # Ambil konten paragraf utama&lt;br /&gt;
         paragraphs = soup.find_all(&amp;#039;p&amp;#039;)&lt;br /&gt;
         text_content = &amp;#039; &amp;#039;.join([p.get_text() for p in paragraphs[:5]])  # Batasi 5 paragraf pertama&lt;br /&gt;
         return title.strip(), text_content.strip()&lt;br /&gt;
 &lt;br /&gt;
     except Exception as e:&lt;br /&gt;
         return &amp;#039;Error&amp;#039;, f&amp;quot;Failed to fetch content: {e}&amp;quot;&lt;br /&gt;
 &lt;br /&gt;
 def google_scrape_with_content(keywords, num_results=5, output_file=&amp;#039;scraped_results.csv&amp;#039;):&lt;br /&gt;
     with open(output_file, mode=&amp;#039;w&amp;#039;, newline=&amp;#039;&amp;#039;, encoding=&amp;#039;utf-8&amp;#039;) as file:&lt;br /&gt;
         writer = csv.writer(file)&lt;br /&gt;
         writer.writerow([&amp;#039;Keyword&amp;#039;, &amp;#039;Rank&amp;#039;, &amp;#039;Title&amp;#039;, &amp;#039;URL&amp;#039;, &amp;#039;Content&amp;#039;]) &lt;br /&gt;
 &lt;br /&gt;
         for keyword in keywords:&lt;br /&gt;
             print(f&amp;quot;\n🔍 Searching for: {keyword}&amp;quot;)&lt;br /&gt;
             try:&lt;br /&gt;
                 results = search(keyword, num_results=num_results)&lt;br /&gt;
                 for i, url in enumerate(results):&lt;br /&gt;
                     print(f&amp;quot;  → Fetching: {url}&amp;quot;)&lt;br /&gt;
                     title, content = get_page_content(url)&lt;br /&gt;
                     writer.writerow([keyword, i+1, title, url, content])&lt;br /&gt;
                     time.sleep(2)  # Delay biar aman&lt;br /&gt;
             except Exception as e:&lt;br /&gt;
                 print(f&amp;quot;❌ Error while searching &amp;#039;{keyword}&amp;#039;: {e}&amp;quot;) &lt;br /&gt;
 &lt;br /&gt;
     print(f&amp;quot;\n✅ All results + content saved to &amp;#039;{output_file}&amp;#039;&amp;quot;)&lt;br /&gt;
 &lt;br /&gt;
 # Main&lt;br /&gt;
 if __name__ == &amp;#039;__main__&amp;#039;:&lt;br /&gt;
     keywords = load_keywords(&amp;#039;keywords.txt&amp;#039;)&lt;br /&gt;
     google_scrape_with_content(keywords, num_results=5)&lt;br /&gt;
&lt;br /&gt;
==Output (`scraped_results.csv`):==&lt;br /&gt;
&lt;br /&gt;
 | Keyword | Rank | Title | URL | Content |&lt;br /&gt;
 |--------|------|-------|-----|---------|&lt;br /&gt;
 | berita teknologi Indonesia | 1 | Judul dari halaman | https://... | Paragraf-paragraf pertama |&lt;br /&gt;
 | ... | ... | ... | ... | ... |&lt;br /&gt;
&lt;br /&gt;
==Tips:==&lt;br /&gt;
* Jangan pakai `num_results &amp;gt; 10` kalau nggak pakai delay besar.&lt;br /&gt;
* Bisa diubah agar simpan ke `.txt` atau `.json` juga.&lt;br /&gt;
* Mau filter halaman yang &amp;#039;&amp;#039;&amp;#039;bukan berita&amp;#039;&amp;#039;&amp;#039;? Bisa ditambahkan regex atau `if &amp;quot;news&amp;quot; in url`.&lt;/div&gt;</summary>
		<author><name>Unknown user</name></author>
	</entry>
</feed>