<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=Scrapping%3A_Save_text_setiap_pencarian</id>
	<title>Scrapping: Save text setiap pencarian - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://lms.onnocenter.or.id/wiki/index.php?action=history&amp;feed=atom&amp;title=Scrapping%3A_Save_text_setiap_pencarian"/>
	<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_pencarian&amp;action=history"/>
	<updated>2026-05-03T09:21:46Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.45.1</generator>
	<entry>
		<id>https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_pencarian&amp;diff=72181&amp;oldid=prev</id>
		<title>Unknown user: Created page with &quot;Simpan konten &#039;&#039;&#039;per URL ke file `.txt`&#039;&#039;&#039;, 1 file per halaman   Jadi tiap hasil pencarian disimpan jadi file `.txt` sendiri biar bisa dibaca atau dianalisis dengan mudah.  ==...&quot;</title>
		<link rel="alternate" type="text/html" href="https://lms.onnocenter.or.id/wiki/index.php?title=Scrapping:_Save_text_setiap_pencarian&amp;diff=72181&amp;oldid=prev"/>
		<updated>2025-03-28T23:22:22Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;Simpan konten &amp;#039;&amp;#039;&amp;#039;per URL ke file `.txt`&amp;#039;&amp;#039;&amp;#039;, 1 file per halaman   Jadi tiap hasil pencarian disimpan jadi file `.txt` sendiri biar bisa dibaca atau dianalisis dengan mudah.  ==...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Simpan konten &amp;#039;&amp;#039;&amp;#039;per URL ke file `.txt`&amp;#039;&amp;#039;&amp;#039;, 1 file per halaman  &lt;br /&gt;
Jadi tiap hasil pencarian disimpan jadi file `.txt` sendiri biar bisa dibaca atau dianalisis dengan mudah.&lt;br /&gt;
&lt;br /&gt;
==FITUR-FITUR:==&lt;br /&gt;
* Baca `keywords.txt` → cari di Google&lt;br /&gt;
* Ambil `top-N` hasil pencarian tiap keyword&lt;br /&gt;
* Kunjungi tiap URL, ambil &amp;#039;&amp;#039;&amp;#039;judul + isi artikel (5 paragraf pertama)&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
* Simpan ke folder `outputs/` dalam format `.txt`:&lt;br /&gt;
** Nama file: `keyword_rank_judul.txt`&lt;br /&gt;
&lt;br /&gt;
==PERSIAPAN:==&lt;br /&gt;
&lt;br /&gt;
 pip install googlesearch-python requests beautifulsoup4&lt;br /&gt;
&lt;br /&gt;
Buat file `keywords.txt`:&lt;br /&gt;
&lt;br /&gt;
 berita teknologi Indonesia&lt;br /&gt;
 politik 2025&lt;br /&gt;
 game PS5 terbaru&lt;br /&gt;
&lt;br /&gt;
==SCRIPT PYTHON SUPER LENGKAP:==&lt;br /&gt;
&lt;br /&gt;
 import os&lt;br /&gt;
 import requests&lt;br /&gt;
 from googlesearch import search&lt;br /&gt;
 from bs4 import BeautifulSoup&lt;br /&gt;
 import re&lt;br /&gt;
 import time&lt;br /&gt;
 &lt;br /&gt;
 def load_keywords(filename):&lt;br /&gt;
     with open(filename, &amp;#039;r&amp;#039;, encoding=&amp;#039;utf-8&amp;#039;) as f:&lt;br /&gt;
         return [line.strip() for line in f if line.strip()]&lt;br /&gt;
 &lt;br /&gt;
 def clean_filename(text):&lt;br /&gt;
     # Hilangkan karakter ilegal untuk nama file&lt;br /&gt;
     return re.sub(r&amp;#039;[\\/*?:&amp;quot;&amp;lt;&amp;gt;|]&amp;#039;, &amp;#039;&amp;#039;, text).strip().replace(&amp;#039; &amp;#039;, &amp;#039;_&amp;#039;)[:50]&lt;br /&gt;
 &lt;br /&gt;
 def get_page_content(url):&lt;br /&gt;
     try:&lt;br /&gt;
         headers = {&lt;br /&gt;
             &amp;quot;User-Agent&amp;quot;: &amp;quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64)&amp;quot;&lt;br /&gt;
         }&lt;br /&gt;
         response = requests.get(url, headers=headers, timeout=10)&lt;br /&gt;
         soup = BeautifulSoup(response.content, &amp;#039;html.parser&amp;#039;)&lt;br /&gt;
         title = soup.title.string if soup.title else &amp;#039;No Title&amp;#039;&lt;br /&gt;
         paragraphs = soup.find_all(&amp;#039;p&amp;#039;)&lt;br /&gt;
         content = &amp;#039;\n\n&amp;#039;.join([p.get_text() for p in paragraphs[:5]])&lt;br /&gt;
         return title.strip(), content.strip()&lt;br /&gt;
     except Exception as e:&lt;br /&gt;
         return &amp;#039;Error&amp;#039;, f&amp;quot;Gagal mengambil konten: {e}&amp;quot;&lt;br /&gt;
 &lt;br /&gt;
 def save_to_txt(keyword, rank, title, url, content, folder=&amp;#039;outputs&amp;#039;):&lt;br /&gt;
     os.makedirs(folder, exist_ok=True)&lt;br /&gt;
     filename = f&amp;quot;{clean_filename(keyword)}_{rank}_{clean_filename(title)}.txt&amp;quot;&lt;br /&gt;
     filepath = os.path.join(folder, filename)&lt;br /&gt;
 &lt;br /&gt;
     with open(filepath, &amp;#039;w&amp;#039;, encoding=&amp;#039;utf-8&amp;#039;) as f:&lt;br /&gt;
         f.write(f&amp;quot;Keyword   : {keyword}\n&amp;quot;)&lt;br /&gt;
         f.write(f&amp;quot;Peringkat : {rank}\n&amp;quot;)&lt;br /&gt;
         f.write(f&amp;quot;Judul     : {title}\n&amp;quot;)&lt;br /&gt;
         f.write(f&amp;quot;URL       : {url}\n\n&amp;quot;)&lt;br /&gt;
         f.write(content)&lt;br /&gt;
 &lt;br /&gt;
 def scrape_and_save_txt(keywords, num_results=5):&lt;br /&gt;
     for keyword in keywords:&lt;br /&gt;
         print(f&amp;quot;\n🔍 Searching: {keyword}&amp;quot;)&lt;br /&gt;
         try:&lt;br /&gt;
             results = search(keyword, num_results=num_results)&lt;br /&gt;
             for i, url in enumerate(results):&lt;br /&gt;
                 print(f&amp;quot;  → ({i+1}) Fetching: {url}&amp;quot;)&lt;br /&gt;
                 title, content = get_page_content(url)&lt;br /&gt;
                 save_to_txt(keyword, i+1, title, url, content)&lt;br /&gt;
                 time.sleep(2)&lt;br /&gt;
         except Exception as e:&lt;br /&gt;
             print(f&amp;quot;❌ Error saat mencari &amp;#039;{keyword}&amp;#039;: {e}&amp;quot;)&lt;br /&gt;
 &lt;br /&gt;
     print(&amp;quot;\n✅ Semua konten telah disimpan di folder &amp;#039;outputs/&amp;#039;&amp;quot;)&lt;br /&gt;
 &lt;br /&gt;
 # Main&lt;br /&gt;
 if __name__ == &amp;#039;__main__&amp;#039;:&lt;br /&gt;
     keywords = load_keywords(&amp;#039;keywords.txt&amp;#039;)&lt;br /&gt;
     scrape_and_save_txt(keywords, num_results=5)&lt;br /&gt;
&lt;br /&gt;
==Output:==&lt;br /&gt;
Folder `outputs/` akan berisi file seperti:&lt;br /&gt;
&lt;br /&gt;
 berita_teknologi_Indonesia_1_Tekno_Terbaru_dari_Tempo.txt&lt;br /&gt;
 politik_2025_2_Media_Indonesia_Politik.txt&lt;br /&gt;
&lt;br /&gt;
Isi filenya:&lt;br /&gt;
&lt;br /&gt;
 Keyword   : berita teknologi Indonesia&lt;br /&gt;
 Peringkat : 1&lt;br /&gt;
 Judul     : Berita Teknologi Terbaru Hari Ini - Tempo&lt;br /&gt;
 URL       : https://tekno.tempo.co/...&lt;br /&gt;
 &lt;br /&gt;
 [ISI PARAGRAF]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Pranala Menarik==&lt;br /&gt;
&lt;br /&gt;
* [[Scrapping]]&lt;/div&gt;</summary>
		<author><name>Unknown user</name></author>
	</entry>
</feed>