diff --git a/start.py b/start.py
index 07511a5..3d754b2 100644
--- a/start.py
+++ b/start.py
@@ -43,6 +43,9 @@ class MyThread(QThread):
     def capture_output(self, p):
         while  self.running and p.poll() is None:
             output = p.stdout.readline()
+            err = p.stderr.readline()
+            if err:
+                self.update_signal.emit({'msg': err.strip()})
             if output:
                 self.update_signal.emit({'msg': output.strip()})
     
diff --git a/zcspider/spiders/base.py b/zcspider/spiders/base.py
index 186b95d..c19e9bd 100644
--- a/zcspider/spiders/base.py
+++ b/zcspider/spiders/base.py
@@ -26,10 +26,19 @@ class BaseSpider(scrapy.Spider):
          self.name = name
          self.group = group
          self.ext = tuple(['.png', '.jpg', '.jpeg', '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.rar', '.zip', '.ico', '.dat', '.css', '.js', '.mp4', '.m3u', '.flv'])
+         self.resource_content_types = ['image/', 'text/css', 'application/javascript', 'application/octet-stream']
          print(f"爬取开始: {name}_{domain}")
 
+    def fix_url_scheme(self, url, default_scheme='http'):
+        # 检查URL是否包含方案
+        if not url.startswith('http://') and not url.startswith('https://'):
+            # 如果没有方案，添加默认方案
+            url = f'{default_scheme}://{url}'
+        return url
+    
     def start_requests(self):
         for url in self.start_urls:
+            url = self.fix_url_scheme(url)
             r = scrapy.Request(url, dont_filter=True, headers=self.headers, callback=self.parse, errback=self.request2, meta={'download_timeout': 30})
             yield r
 
@@ -38,10 +47,18 @@ class BaseSpider(scrapy.Spider):
             return True
         return False
 
+    def is_file_res(self, res):
+        content_type = res.headers['content-type']
+        if isinstance(content_type, bytes):
+            content_type = str(content_type, encoding = "utf-8")
+        return any(content_type.startswith(prefix) for prefix in self.resource_content_types)
+
     def request2(self, fail):
         rurl = fail.request.url
         self.logger.info(f'{rurl} 使用requests继续请求')
         r = requests.get(url=fail.request.url, headers=self.headers, timeout=20)
+        if self.is_file_res(r):
+            return
         if r.status_code < 400:
             rtext = r.text
             h = html2text.HTML2Text()
@@ -58,12 +75,16 @@ class BaseSpider(scrapy.Spider):
             links = re.findall(r'href=["\']?([^"\'>]+)', r.text)
             for link in links:
                 full_link = urljoin(r.url, link)
+                if not full_link.startswith('http'):
+                    continue
                 if full_link not in self.visited_urls and (self.is_file_url(full_link) is False):
                     if urlparse(full_link).netloc.replace('www.', '') == self.domain:
                         yield scrapy.Request(full_link, callback=self.parse, headers=self.headers, errback=self.request2, meta={'download_timeout': 30})
          
     def parse(self, response):
         self.visited_urls.add(response.url)
+        if self.is_file_res(response):
+            return
         h = html2text.HTML2Text()
         h.ignore_links = True  # 忽略所有链接
         # 提取纯文本内容
@@ -80,11 +101,20 @@ class BaseSpider(scrapy.Spider):
                     'text': text,
                 }
 
-            for link in response.css("a::attr('href')").getall():
+            links = re.findall(r'href=["\']?([^"\'>]+)', text)
+
+            for link in links:
                 full_link = response.urljoin(link)
+                if not full_link.startswith('http'):
+                    continue
                 if full_link not in self.visited_urls and (self.is_file_url(full_link) is False):
                     if urlparse(full_link).netloc.replace('www.', '') == self.domain:
+                        # try:
                         yield scrapy.Request(full_link, callback=self.parse, headers=self.headers, errback=self.request2, meta={'download_timeout': 30})
+                        # except ValueError:
+                        #     import traceback
+                        #     print(traceback.format_exc())
+                        #     print(full_link)
     
     def closed(self, reason):
         # This method will be called when the Spider is about to close