FAQ Search Today's Posts Mark Forums Read
» Video Reviews

» Linux Archive

Linux-archive is a website aiming to archive linux email lists and to make them easily accessible for linux users/developers.


» Sponsor

» Partners

» Sponsor

Go Back   Linux Archive > ArchLinux > ArchLinux Pacman Development

 
 
LinkBack Thread Tools
 
Old 07-17-2011, 12:58 AM
Allan McRae
 
Default Handle removal of empty directories properly

On 17/07/11 01:11, Dan McGee wrote:

This addresses FS#25141. We shouldn't remove every empty directory we
come across during the removal process unless it is truly not known to
any other package. This will prevent removal of essential directories
such as '/var/lock/'.

This is accomplished by first checking the empty/non-empty status of a
directory, which was previously done implicitly by calling rmdir() and
ignoring errors. We do this to avoid the next (new) check in most cases,
which is to look at all local packages to see if the to-be-removed
directory is present in another packages' filelist. If we do not find it
anywhere, then we remove it, else we keep the file around.

The pactest has been updated to test more cases, as well as finding a
flaw in the original expected to fail case- we need separate DIR and
FILE based EXIST rules.

Signed-off-by: Dan McGee<dan@archlinux.org>
---


Looks fine. A couple of minor comments below.


lib/libalpm/remove.c | 41 ++++++++++++++++++++++++++++++++++-----
lib/libalpm/util.c | 29 ++++++++++++++++++++++++++++
lib/libalpm/util.h | 1 +
test/pacman/pmrule.py | 8 +++++++
test/pacman/tests/remove070.py | 24 +++++++++++++++-------
5 files changed, 89 insertions(+), 14 deletions(-)

diff --git a/lib/libalpm/remove.c b/lib/libalpm/remove.c
index b15dbaa..b946abc 100644
--- a/lib/libalpm/remove.c
+++ b/lib/libalpm/remove.c
@@ -232,8 +232,8 @@ static void unlink_file(alpm_handle_t *handle, alpm_pkg_t *info,
* see the big comment block in db_find_fileconflicts() for an
* explanation. */
if(alpm_list_find_str(skip_remove, fileobj->name)) {
- _alpm_log(handle, ALPM_LOG_DEBUG, "%s is in skip_remove, skipping removal
",
- file);
+ _alpm_log(handle, ALPM_LOG_DEBUG,
+ "%s is in skip_remove, skipping removal
", file);
return;
}

@@ -247,11 +247,40 @@ static void unlink_file(alpm_handle_t *handle, alpm_pkg_t *info,
}

if(S_ISDIR(buf.st_mode)) {
- if(rmdir(file)) {
- /* this is okay, other packages are probably using it (like /usr) */
- _alpm_log(handle, ALPM_LOG_DEBUG, "keeping directory %s
", file);
+ ssize_t filecount = _alpm_files_in_directory(handle, file);
+ /* if we have files, no need to remove the directory */
+ if(filecount> 0) {
+ _alpm_log(handle, ALPM_LOG_DEBUG, "keeping directory %s (%zd files)
",
+ file, filecount);
+ } else if(filecount< 0) {
+ _alpm_log(handle, ALPM_LOG_DEBUG,
+ "keeping directory %s (could not count files)
", file);
} else {
- _alpm_log(handle, ALPM_LOG_DEBUG, "removing directory %s
", file);
+ /* one last check- does any other package own this file? */
+ alpm_list_t *local, *local_pkgs;
+ int found = 0;
+ local_pkgs = _alpm_db_get_pkgcache(handle->db_local);
+ for(local = local_pkgs; local&& !found; local = local->next) {
+ alpm_pkg_t *local_pkg = local->data;
+ alpm_list_t *files;
+
+ /* we duplicated the package when we put it in the removal list, so we
+ * so we can't use direct pointer comparison here. */
+ if(_alpm_pkg_cmp(info, local_pkg) == 0) {
+ continue;
+ }
+ files = alpm_pkg_get_files(local_pkg);
+ if(_alpm_filelist_contains(files, fileobj->name)) {
+ _alpm_log(handle, ALPM_LOG_DEBUG,
+ "keeping directory %s (owned by %s)
", file, local_pkg->name);
+ found = 1;


break;


+ }
+ }
+ if(!found) {
+ rmdir(file);
+ _alpm_log(handle, ALPM_LOG_DEBUG,
+ "removing directory %s (no remaining owners)
", file);
+ }
}
} else {
/* if the file needs backup and has been modified, back it up to .pacsave */
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
index cd7f19c..af2db76 100644
--- a/lib/libalpm/util.c
+++ b/lib/libalpm/util.c
@@ -388,6 +388,35 @@ int _alpm_rmrf(const char *path)
return 0;
}

+ssize_t _alpm_files_in_directory(alpm_handle_t *handle, const char *path)
+{
+ ssize_t files = 0;
+ struct dirent *ent;
+ DIR *dir = opendir(path);
+
+ if(!dir) {
+ if(errno == ENOTDIR) {
+ _alpm_log(handle, ALPM_LOG_DEBUG, "%s was not a directory
", path);
+ } else {
+ _alpm_log(handle, ALPM_LOG_DEBUG, "could not read directory %s
",
+ path);
+ }
+ return -1;
+ }
+ while((ent = readdir(dir)) != NULL) {
+ const char *name = ent->d_name;
+
+ if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
+ continue;
+ }
+
+ files++;


Do we need to keep counting? i.e. can we just return -1 error, 0 none,
1 some?



+ }
+
+ closedir(dir);
+ return files;
+}
+
int _alpm_logaction(alpm_handle_t *handle, const char *fmt, va_list args)
{
int ret = 0;
diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
index 450dac9..2b71c5c 100644
--- a/lib/libalpm/util.h
+++ b/lib/libalpm/util.h
@@ -100,6 +100,7 @@ int _alpm_unpack_single(alpm_handle_t *handle, const char *archive,
int _alpm_unpack(alpm_handle_t *handle, const char *archive, const char *prefix,
alpm_list_t *list, int breakfirst);
int _alpm_rmrf(const char *path);
+ssize_t _alpm_files_in_directory(alpm_handle_t *handle, const char *path);
int _alpm_logaction(alpm_handle_t *handle, const char *fmt, va_list args);
int _alpm_run_chroot(alpm_handle_t *handle, const char *path, char *const argv[]);
int _alpm_ldconfig(alpm_handle_t *handle);
diff --git a/test/pacman/pmrule.py b/test/pacman/pmrule.py
index 0f6ae60..cb7ae88 100644
--- a/test/pacman/pmrule.py
+++ b/test/pacman/pmrule.py
@@ -146,6 +146,14 @@ def check(self, test):
else:
print "FILE rule '%s' not found" % case
success = -1
+ elif kind == "DIR":
+ filename = os.path.join(test.root, key)
+ if case == "EXIST":
+ if not os.path.isdir(filename):
+ success = 0
+ else:
+ print "DIR rule '%s' not found" % case
+ success = -1
elif kind == "LINK":
filename = os.path.join(test.root, key)
if case == "EXIST":
diff --git a/test/pacman/tests/remove070.py b/test/pacman/tests/remove070.py
index e0587e1..898e2f5 100644
--- a/test/pacman/tests/remove070.py
+++ b/test/pacman/tests/remove070.py
@@ -1,21 +1,29 @@
-self.description = "Remove a package with an empty directory needed by another package"
+self.description = "Remove a package with various directory overlaps"
+
+self.filesystem = ["lib/alsoonfs/randomfile"]

p1 = pmpkg("pkg1")
-p1.files = ["bin/pkg1", "opt/"]
+p1.files = ["bin/pkg1",
+ "opt/",
+ "lib/onlyinp1/",
+ "lib/alsoonfs/"]

p2 = pmpkg("pkg2")
-p2.files = ["bin/pkg2", "opt/"]
+p2.files = ["bin/pkg2",
+ "opt/",
+ "lib/onlyinp2/"]

for p in p1, p2:
- self.addpkg2db("local", p)
+ self.addpkg2db("local", p)

self.args = "-R %s" % p1.name

self.addrule("PACMAN_RETCODE=0")
self.addrule("!PKG_EXIST=pkg1")
self.addrule("PKG_EXIST=pkg2")
-self.addrule("!FILE_EXIST=bin/pkg1")
-self.addrule("FILE_EXIST=bin/pkg2")
-self.addrule("FILE_EXIST=opt/")

-self.expectfailure = True
+self.addrule("DIR_EXIST=bin/")
+self.addrule("DIR_EXIST=opt/")
+self.addrule("!DIR_EXIST=lib/onlyinp1/")
+self.addrule("DIR_EXIST=lib/onlyinp2/")
+self.addrule("DIR_EXIST=lib/alsoonfs/")
 
Old 07-17-2011, 09:29 AM
Xavier Chantry
 
Default Handle removal of empty directories properly

On Sat, Jul 16, 2011 at 5:11 PM, Dan McGee <dan@archlinux.org> wrote:
> This addresses FS#25141. We shouldn't remove every empty directory we
> come across during the removal process unless it is truly not known to
> any other package. This will prevent removal of essential directories
> such as '/var/lock/'.
>
> This is accomplished by first checking the empty/non-empty status of a
> directory, which was previously done implicitly by calling rmdir() and
> ignoring errors. We do this to avoid the next (new) check in most cases,
> which is to look at all local packages to see if the to-be-removed
> directory is present in another packages' filelist. If we do not find it
> anywhere, then we remove it, else we keep the file around.
>

I always thought pacman should do that, but Nagy concerns were
obviously right, loading /var/lib/pacman/local/*/files takes time.
This just highlights once more the poor design of the local db.

pacman -R teeworlds goes from 2sec to 12sec here (after dropping caches)

... and from 0.4sec to 0.7sec when everything is cached. But only a
pacman -Qo 'orphaned_file' or a previous -R operation would load all
/var/lib/pacman/local/*/files

Just to make myself clear, I think this patch should go in, and the
local db out
In the meantime, we have filesystem loops fortunately.
 
Old 07-18-2011, 03:27 PM
Dan McGee
 
Default Handle removal of empty directories properly

On Sun, Jul 17, 2011 at 4:29 AM, Xavier Chantry
<chantry.xavier@gmail.com> wrote:
> On Sat, Jul 16, 2011 at 5:11 PM, Dan McGee <dan@archlinux.org> wrote:
>> This addresses FS#25141. We shouldn't remove every empty directory we
>> come across during the removal process unless it is truly not known to
>> any other package. This will prevent removal of essential directories
>> such as '/var/lock/'.
>>
>> This is accomplished by first checking the empty/non-empty status of a
>> directory, which was previously done implicitly by calling rmdir() and
>> ignoring errors. We do this to avoid the next (new) check in most cases,
>> which is to look at all local packages to see if the to-be-removed
>> directory is present in another packages' filelist. If we do not find it
>> anywhere, then we remove it, else we keep the file around.
>>
>
> I always thought pacman should do that, but Nagy concerns were
> obviously right, loading /var/lib/pacman/local/*/files takes time.
> This just highlights once more the poor design of the local db.
>
> pacman -R teeworlds goes from 2sec to 12sec here (after dropping caches)
Haha, I haven't done the "drop caches" checks in quite some time. I've
mostly conceded the fact that this will be slower, and it isn't all
that common. Did you run pacman-db-optimize first?

> ... and from 0.4sec to 0.7sec when everything is cached. But only a
> pacman -Qo 'orphaned_file' or a previous -R operation would load all
> /var/lib/pacman/local/*/files
Or the much more common case, which is an -S operation, where we
definitely load quite a few local filelists. Sure, not all of them,
but still a good chunk on an -Syu.

> Just to make myself clear, I think this patch should go in, and the
> local db out
> In the meantime, we have filesystem loops fortunately.
Yes, I feel like including something like this will make us take a
closer look at the local DB format in the future. Allan and I have
already looked into some things so we can do full package integrity
checking on the system (modes, sizes, checksums, etc.)

-Dan
 

Thread Tools




All times are GMT. The time now is 10:55 PM.

VBulletin, Copyright ©2000 - 2014, Jelsoft Enterprises Ltd.
Content Relevant URLs by vBSEO ©2007, Crawlability, Inc.
Copyright 2007 - 2008, www.linux-archive.org