Commit 9c2b924d authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #11681 from alalek:fix_docs_bs4_spaces

parents 15130f4f 0560747d
...@@ -3,6 +3,7 @@ import sys ...@@ -3,6 +3,7 @@ import sys
import logging import logging
import os import os
import re
from pprint import pprint from pprint import pprint
import traceback import traceback
...@@ -17,12 +18,20 @@ except ImportError: ...@@ -17,12 +18,20 @@ except ImportError:
def load_html_file(file_dir): def load_html_file(file_dir):
""" Uses BeautifulSoup to load an html """ """ Uses BeautifulSoup to load an html """
with open(file_dir, 'rb') as fp: with open(file_dir, 'rb') as fp:
soup = BeautifulSoup(fp, 'html.parser') data = fp.read()
if os.name == 'nt' or sys.version_info[0] == 3:
data = data.decode(encoding='utf-8', errors='strict')
data = re.sub(r'(\>)([ ]+)', lambda match: match.group(1) + ('!space!' * len(match.group(2))), data)
data = re.sub(r'([ ]+)(\<)', lambda match: ('!space!' * len(match.group(1))) + match.group(2), data)
if os.name == 'nt' or sys.version_info[0] == 3:
data = data.encode('utf-8', 'ignore')
soup = BeautifulSoup(data, 'html.parser')
return soup return soup
def update_html(file, soup): def update_html(file, soup):
s = str(soup) s = str(soup)
if os.name == 'nt' or sys.version_info[0] == 3: # if Windows s = s.replace('!space!', ' ')
if os.name == 'nt' or sys.version_info[0] == 3:
s = s.encode('utf-8', 'ignore') s = s.encode('utf-8', 'ignore')
with open(file, 'wb') as f: with open(file, 'wb') as f:
f.write(s) f.write(s)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment